diff --git a/.github/workflows/build-ci-container.yml b/.github/workflows/build-ci-container.yml
index 85ecc82fa6490..8a81d47186469 100644
--- a/.github/workflows/build-ci-container.yml
+++ b/.github/workflows/build-ci-container.yml
@@ -27,6 +27,7 @@ jobs:
       container-name-tag: ${{ steps.vars.outputs.container-name-tag }}
       container-name-agent-tag: ${{ steps.vars.outputs.container-name-agent-tag }}
       container-filename: ${{ steps.vars.outputs.container-filename }}
+      container-agent-filename: ${{ steps.vars.outputs.container-agent-filename }}
     steps:
       - name: Checkout LLVM
         uses: actions/checkout@v4
@@ -42,6 +43,7 @@ jobs:
           echo "container-name-tag=$container_name:$tag" >> $GITHUB_OUTPUT
           echo "container-name-agent-tag=$container_name-agent:$tag" >> $GITHUB_OUTPUT
           echo "container-filename=$(echo $container_name:$tag  | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
+          echo "container-agent-filename=$(echo $container_name-agent:$tag  | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
       - name: Build container
         working-directory: ./.github/workflows/containers/github-action-ci/
         run: |
@@ -53,13 +55,14 @@ jobs:
       # maintain minimal permissions while building the container.
       - name: Save container image
         run: |
-          podman save  ${{ steps.vars.outputs.container-name-tag }} ${{ steps.vars.outputs.container-name-agent-tag }} >  ${{ steps.vars.outputs.container-filename }}
+          podman save ${{ steps.vars.outputs.container-name-tag }}  >  ${{ steps.vars.outputs.container-filename }}
+          podman save ${{ steps.vars.outputs.container-name-agent-tag }} > ${{ steps.vars.outputs.container-agent-filename }}
 
       - name: Upload container image
         uses: actions/upload-artifact@v4
         with:
           name: container
-          path: ${{ steps.vars.outputs.container-filename }}
+          path: "*.tar"
           retention-days: 14
 
       - name: Test Container
@@ -92,6 +95,7 @@ jobs:
           podman push ${{ needs.build-ci-container.outputs.container-name-tag }}
           podman push ${{ needs.build-ci-container.outputs.container-name }}:latest
 
+          podman load -i ${{ needs.build-ci-container.outputs.container-agent-filename }}
           podman tag ${{ needs.build-ci-container.outputs.container-name-agent-tag }} ${{ needs.build-ci-container.outputs.container-name-agent }}:latest
           podman push ${{ needs.build-ci-container.outputs.container-name-agent-tag }}
           podman push ${{ needs.build-ci-container.outputs.container-name-agent }}:latest
diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
index 9e839ff49e283..41b3075288d2d 100644
--- a/.github/workflows/libclang-abi-tests.yml
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -130,7 +130,7 @@ jobs:
             sed -i 's/LLVM_[0-9]\+/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi
           done
       - name: Upload ABI file
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: ${{ matrix.name }}
           path: '*${{ matrix.ref }}.abi'
@@ -143,12 +143,12 @@ jobs:
       - abi-dump
     steps:
       - name: Download baseline
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8
         with:
           name: build-baseline
           path: build-baseline
       - name: Download latest
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8
         with:
           name: build-latest
           path: build-latest
@@ -162,7 +162,7 @@ jobs:
           done
       - name: Upload ABI Comparison
         if: always()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: compat-report-${{ github.sha }}
           path: compat_reports/
diff --git a/.github/workflows/libcxx-restart-preempted-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml
index 82d84c01c92af..e7e3772d4de22 100644
--- a/.github/workflows/libcxx-restart-preempted-jobs.yaml
+++ b/.github/workflows/libcxx-restart-preempted-jobs.yaml
@@ -92,6 +92,12 @@ jobs:
                 check_run_id: check_run_id
               })
 
+              // For temporary debugging purposes to see the structure of the annotations.
+              console.log(annotations);
+
+              has_failed_job = false;
+              saved_failure_message = null;
+
               for (annotation of annotations.data) {
                 if (annotation.annotation_level != 'failure') {
                   continue;
@@ -106,15 +112,32 @@ jobs:
 
                 const failure_match = annotation.message.match(failure_regex);
                 if (failure_match != null) {
-                  // We only want to restart the workflow if all of the failures were due to preemption.
-                  // We don't want to restart the workflow if there were other failures.
-                  core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
-                    'Failure message: "' + annotation.message + '"');
-                  await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
-                    + 'Failure message: ' + annotation.message)
-                  return;
+                  has_failed_job = true;
+                  saved_failure_message = annotation.message;
                 }
               }
+              if (has_failed_job && (! has_preempted_job)) {
+                // We only want to restart the workflow if all of the failures were due to preemption.
+                // We don't want to restart the workflow if there were other failures.
+                //
+                // However, libcxx runners running inside docker containers produce both a preemption message and failure message.
+                //
+                // The desired approach is to ignore failure messages which appear on the same job as a preemption message
+                // (An job is a single run with a specific configuration, ex generic-gcc, gcc-14).
+                //
+                // However, it's unclear that this code achieves the desired approach, and it may ignore all failures
+                // if a preemption message is found at all on any run.
+                //
+                // For now, it's more important to restart preempted workflows than to avoid restarting workflows with
+                // non-preemption failures.
+                //
+                // TODO Figure this out.
+                core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
+                  'Failure message: "' + saved_failure_message + '"');
+                await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
+                    + 'Failure message: ' + saved_failure_message)
+                return;
+              }
             }
 
             if (!has_preempted_job) {
diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 26e644229aaa2..4e570a7cb1455 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -137,14 +137,14 @@ jobs:
           # Remove symbol versioning from dumps, so we can compare across major versions.
           sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi
       - name: Upload ABI file
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: ${{ matrix.name }}
           path: ${{ matrix.ref }}.abi
 
       - name: Upload symbol list file
         if: matrix.name == 'build-baseline'
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: symbol-list
           path: llvm.symbols
@@ -157,17 +157,17 @@ jobs:
       - abi-dump
     steps:
       - name: Download baseline
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8
         with:
           name: build-baseline
           path: build-baseline
       - name: Download latest
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8
         with:
           name: build-latest
           path: build-latest
       - name: Download symbol list
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8
         with:
           name: symbol-list
           path: symbol-list
@@ -186,7 +186,7 @@ jobs:
           abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c"
       - name: Upload ABI Comparison
         if: always()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: compat-report-${{ github.sha }}
           path: compat_reports/
diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index 6c7f1d81a953e..30f4fc807f3a5 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -18,11 +18,6 @@ jobs:
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
       cancel-in-progress: true
-    container:
-      image: ghcr.io/llvm/ci-ubuntu-22.04:latest
-    defaults:
-      run:
-        shell: bash
     steps:
       - name: Checkout LLVM
         uses: actions/checkout@v4
diff --git a/.github/workflows/release-documentation.yml b/.github/workflows/release-documentation.yml
index 922c5093f1357..09e21585bfc56 100644
--- a/.github/workflows/release-documentation.yml
+++ b/.github/workflows/release-documentation.yml
@@ -59,7 +59,7 @@ jobs:
           ./llvm/utils/release/build-docs.sh -release "${{ inputs.release-version }}" -no-doxygen
 
       - name: Create Release Notes Artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
         with:
           name: release-notes
           path: docs-build/html-export/
diff --git a/.github/workflows/spirv-tests.yml b/.github/workflows/spirv-tests.yml
index 34c77a398c150..ea466dc6c52e5 100644
--- a/.github/workflows/spirv-tests.yml
+++ b/.github/workflows/spirv-tests.yml
@@ -25,5 +25,5 @@ jobs:
     with:
       build_target: check-llvm-codegen-spirv
       projects:
-      extra_cmake_args: '-DLLVM_TARGETS_TO_BUILD="" -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="SPIRV" -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON'
+      extra_cmake_args: '-DLLVM_TARGETS_TO_BUILD="SPIRV" -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON'
       os_list: '["ubuntu-22.04"]'
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 320623cfa15af..aa83d7f9b13ab 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -80,7 +80,6 @@ class DataAggregator : public DataReader {
 private:
   struct PerfBranchSample {
     SmallVector<LBREntry, 32> LBR;
-    uint64_t PC;
   };
 
   struct PerfBasicSample {
@@ -334,9 +333,6 @@ class DataAggregator : public DataReader {
   /// Process all branch events.
   void processBranchEvents();
 
-  /// This member function supports generating data for AutoFDO LLVM tools.
-  std::error_code writeAutoFDOData(StringRef OutputFilename);
-
   /// Parse the full output generated by perf script to report non-LBR samples.
   std::error_code parseBasicEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 2b02086e3e0c9..de9ec6c1723d5 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -108,15 +108,6 @@ TimeAggregator("time-aggr",
   cl::ZeroOrMore,
   cl::cat(AggregatorCategory));
 
-static cl::opt<bool>
-    UseEventPC("use-event-pc",
-               cl::desc("use event PC in combination with LBR sampling"),
-               cl::cat(AggregatorCategory));
-
-static cl::opt<bool> WriteAutoFDOData(
-    "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
-    cl::cat(AggregatorCategory));
-
 } // namespace opts
 
 namespace {
@@ -187,15 +178,13 @@ void DataAggregator::start() {
                       /*Wait = */false);
   } else if (!opts::ITraceAggregation.empty()) {
     std::string ItracePerfScriptArgs = llvm::formatv(
-        "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
+        "script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
     launchPerfProcess("branch events with itrace", MainEventsPPI,
                       ItracePerfScriptArgs.c_str(),
                       /*Wait = */ false);
   } else {
-    launchPerfProcess("branch events",
-                      MainEventsPPI,
-                      "script -F pid,ip,brstack",
-                      /*Wait = */false);
+    launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack",
+                      /*Wait = */ false);
   }
 
   // Note: we launch script for mem events regardless of the option, as the
@@ -381,67 +370,6 @@ void DataAggregator::parsePreAggregated() {
   }
 }
 
-std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
-  outs() << "PERF2BOLT: writing data for autofdo tools...\n";
-  NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
-                     TimerGroupDesc, opts::TimeAggregator);
-
-  std::error_code EC;
-  raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
-  if (EC)
-    return EC;
-
-  // Format:
-  // number of unique traces
-  // from_1-to_1:count_1
-  // from_2-to_2:count_2
-  // ......
-  // from_n-to_n:count_n
-  // number of unique sample addresses
-  // addr_1:count_1
-  // addr_2:count_2
-  // ......
-  // addr_n:count_n
-  // number of unique LBR entries
-  // src_1->dst_1:count_1
-  // src_2->dst_2:count_2
-  // ......
-  // src_n->dst_n:count_n
-
-  const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
-
-  // AutoFDO addresses are relative to the first allocated loadable program
-  // segment
-  auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
-    if (Address < FirstAllocAddress)
-      return 0;
-    return Address - FirstAllocAddress;
-  };
-
-  OutFile << FallthroughLBRs.size() << "\n";
-  for (const auto &[Trace, Info] : FallthroughLBRs) {
-    OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
-                       filterAddress(Trace.To),
-                       Info.InternCount + Info.ExternCount);
-  }
-
-  OutFile << BasicSamples.size() << "\n";
-  for (const auto [PC, HitCount] : BasicSamples)
-    OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
-
-  OutFile << BranchLBRs.size() << "\n";
-  for (const auto &[Trace, Info] : BranchLBRs) {
-    OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
-                       filterAddress(Trace.To), Info.TakenCount);
-  }
-
-  outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
-         << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
-         << " unique branches to " << OutputFilename << "\n";
-
-  return std::error_code();
-}
-
 void DataAggregator::filterBinaryMMapInfo() {
   if (opts::FilterPID) {
     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -583,15 +511,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
-  // We can finish early if the goal is just to generate data for autofdo
-  if (opts::WriteAutoFDOData) {
-    if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
-      errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
-
-    deleteTempFiles();
-    exit(0);
-  }
-
   // Special handling for memory events
   if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
     return Error::success();
@@ -1158,14 +1077,6 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
     return make_error_code(errc::no_such_process);
   }
 
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
-  if (std::error_code EC = PCRes.getError())
-    return EC;
-  Res.PC = PCRes.get();
-
   if (checkAndConsumeNewLine())
     return Res;
 
@@ -1472,9 +1383,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
                                         bool NeedsSkylakeFix) {
   uint64_t NumTraces{0};
-  // LBRs are stored in reverse execution order. NextPC refers to the next
-  // recorded executed PC.
-  uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
+  // LBRs are stored in reverse execution order. NextLBR refers to the next
+  // executed branch record.
+  const LBREntry *NextLBR = nullptr;
   uint32_t NumEntry = 0;
   for (const LBREntry &LBR : Sample.LBR) {
     ++NumEntry;
@@ -1486,10 +1397,10 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
     // chronological order)
     if (NeedsSkylakeFix && NumEntry <= 2)
       continue;
-    if (NextPC) {
+    if (NextLBR) {
       // Record fall-through trace.
       const uint64_t TraceFrom = LBR.To;
-      const uint64_t TraceTo = NextPC;
+      const uint64_t TraceTo = NextLBR->From;
       const BinaryFunction *TraceBF =
           getBinaryFunctionContainingAddress(TraceFrom);
       if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1524,7 +1435,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
       }
       ++NumTraces;
     }
-    NextPC = LBR.From;
+    NextLBR = &LBR;
 
     uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
     uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
@@ -1561,8 +1472,6 @@ std::error_code DataAggregator::parseBranchEvents() {
     ++NumSamples;
 
     PerfBranchSample &Sample = SampleRes.get();
-    if (opts::WriteAutoFDOData)
-      ++BasicSamples[Sample.PC];
 
     if (Sample.LBR.empty()) {
       ++NumSamplesNoLBR;
diff --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
index 30bc8be1719d5..aeb7fe90f2175 100644
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@@ -118,6 +118,29 @@ findMembersUsedInInitExpr(const CXXCtorInitializer *Initializer,
   return Results;
 }
 
+/// Returns the start of the leading comments before `Loc`.
+static SourceLocation getStartOfLeadingComment(SourceLocation Loc,
+                                               const SourceManager &SM,
+                                               const LangOptions &LangOpts) {
+  // We consider any leading comment token that is on the same line or
+  // indented similarly to the first comment to be part of the leading comment.
+  const unsigned Line = SM.getPresumedLineNumber(Loc);
+  const unsigned Column = SM.getPresumedColumnNumber(Loc);
+  std::optional<Token> Tok =
+      Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  while (Tok && Tok->is(tok::comment)) {
+    const SourceLocation CommentLoc =
+        Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts);
+    if (SM.getPresumedLineNumber(CommentLoc) != Line &&
+        SM.getPresumedColumnNumber(CommentLoc) != Column) {
+      break;
+    }
+    Loc = CommentLoc;
+    Tok = Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  }
+  return Loc;
+}
+
 /// Returns the end of the trailing comments after `Loc`.
 static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
                                               const SourceManager &SM,
@@ -159,6 +182,7 @@ static SourceRange getFullFieldSourceRange(const FieldDecl &Field,
     if (CurrentToken->is(tok::semi))
       break;
   }
+  Begin = getStartOfLeadingComment(Begin, SM, LangOpts);
   End = getEndOfTrailingComment(End, SM, LangOpts);
   return SourceRange(Begin, End);
 }
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index 50da196315d3b..c14d341caf779 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -17,25 +17,16 @@ namespace clang::tidy::utils::lexer {
 std::pair<Token, SourceLocation>
 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
                          const LangOptions &LangOpts, bool SkipComments) {
-  Token Token;
-  Token.setKind(tok::unknown);
+  const std::optional<Token> Tok =
+      Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
 
-  Location = Location.getLocWithOffset(-1);
-  if (Location.isInvalid())
-    return {Token, Location};
-
-  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
-  while (Location != StartOfFile) {
-    Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
-    if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
-        (!SkipComments || !Token.is(tok::comment))) {
-      break;
-    }
-    if (Location == StartOfFile)
-      return {Token, Location};
-    Location = Location.getLocWithOffset(-1);
+  if (Tok.has_value()) {
+    return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
   }
-  return {Token, Location};
+
+  Token Token;
+  Token.setKind(tok::unknown);
+  return {Token, SourceLocation()};
 }
 
 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index 71e97ac4efd67..2b1da4be5c1bd 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -830,6 +830,16 @@ bool OverlayCDB::setCompileCommand(PathRef File,
   return true;
 }
 
+std::unique_ptr<ProjectModules>
+OverlayCDB::getProjectModules(PathRef File) const {
+  auto MDB = DelegatingCDB::getProjectModules(File);
+  MDB->setCommandMangler([&Mangler = Mangler](tooling::CompileCommand &Command,
+                                              PathRef CommandPath) {
+    Mangler(Command, CommandPath);
+  });
+  return std::move(MDB);
+}
+
 DelegatingCDB::DelegatingCDB(const GlobalCompilationDatabase *Base)
     : Base(Base) {
   if (Base)
diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
index f8349c6efecb0..1d636d73664be 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
@@ -209,6 +209,9 @@ class OverlayCDB : public DelegatingCDB {
   setCompileCommand(PathRef File,
                     std::optional<tooling::CompileCommand> CompilationCommand);
 
+  std::unique_ptr<ProjectModules>
+  getProjectModules(PathRef File) const override;
+
 private:
   mutable std::mutex Mutex;
   llvm::StringMap<tooling::CompileCommand> Commands; /* GUARDED_BY(Mut) */
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 5e136d0e76ece..3ab3d89030520 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -1193,12 +1193,13 @@ void maybeAddSymbolProviders(ParsedAST &AST, HoverInfo &HI,
                              include_cleaner::Symbol Sym) {
   trace::Span Tracer("Hover::maybeAddSymbolProviders");
 
-  const SourceManager &SM = AST.getSourceManager();
   llvm::SmallVector<include_cleaner::Header> RankedProviders =
-      include_cleaner::headersForSymbol(Sym, SM, &AST.getPragmaIncludes());
+      include_cleaner::headersForSymbol(Sym, AST.getPreprocessor(),
+                                        &AST.getPragmaIncludes());
   if (RankedProviders.empty())
     return;
 
+  const SourceManager &SM = AST.getSourceManager();
   std::string Result;
   include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
   for (const auto &P : RankedProviders) {
diff --git a/clang-tools-extra/clangd/ProjectModules.h b/clang-tools-extra/clangd/ProjectModules.h
index 3b9b564a87da0..48d52ac9deb89 100644
--- a/clang-tools-extra/clangd/ProjectModules.h
+++ b/clang-tools-extra/clangd/ProjectModules.h
@@ -9,8 +9,10 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULES_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULES_H
 
+#include "support/Function.h"
 #include "support/Path.h"
 #include "support/ThreadsafeFS.h"
+#include "clang/Tooling/CompilationDatabase.h"
 
 #include <memory>
 
@@ -36,11 +38,16 @@ namespace clangd {
 /// `<primary-module-name>[:partition-name]`. So module names covers partitions.
 class ProjectModules {
 public:
+  using CommandMangler =
+      llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
+
   virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
   virtual PathRef
   getSourceForModuleName(llvm::StringRef ModuleName,
                          PathRef RequiredSrcFile = PathRef()) = 0;
 
+  virtual void setCommandMangler(CommandMangler Mangler) {}
+
   virtual ~ProjectModules() = default;
 };
 
diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp
index 92f75ef7d5c25..e4dc11c1c2895 100644
--- a/clang-tools-extra/clangd/ScanningProjectModules.cpp
+++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp
@@ -48,7 +48,8 @@ class ModuleDependencyScanner {
   };
 
   /// Scanning the single file specified by \param FilePath.
-  std::optional<ModuleDependencyInfo> scan(PathRef FilePath);
+  std::optional<ModuleDependencyInfo>
+  scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler);
 
   /// Scanning every source file in the current project to get the
   /// <module-name> to <module-unit-source> map.
@@ -57,7 +58,7 @@ class ModuleDependencyScanner {
   /// a global module dependency scanner to monitor every file. Or we
   /// can simply require the build systems (or even the end users)
   /// to provide the map.
-  void globalScan();
+  void globalScan(const ProjectModules::CommandMangler &Mangler);
 
   /// Get the source file from the module name. Note that the language
   /// guarantees all the module names are unique in a valid program.
@@ -69,7 +70,9 @@ class ModuleDependencyScanner {
 
   /// Return the direct required modules. Indirect required modules are not
   /// included.
-  std::vector<std::string> getRequiredModules(PathRef File);
+  std::vector<std::string>
+  getRequiredModules(PathRef File,
+                     const ProjectModules::CommandMangler &Mangler);
 
 private:
   std::shared_ptr<const clang::tooling::CompilationDatabase> CDB;
@@ -87,7 +90,8 @@ class ModuleDependencyScanner {
 };
 
 std::optional<ModuleDependencyScanner::ModuleDependencyInfo>
-ModuleDependencyScanner::scan(PathRef FilePath) {
+ModuleDependencyScanner::scan(PathRef FilePath,
+                              const ProjectModules::CommandMangler &Mangler) {
   auto Candidates = CDB->getCompileCommands(FilePath);
   if (Candidates.empty())
     return std::nullopt;
@@ -97,10 +101,8 @@ ModuleDependencyScanner::scan(PathRef FilePath) {
   // DirectoryBasedGlobalCompilationDatabase::getCompileCommand.
   tooling::CompileCommand Cmd = std::move(Candidates.front());
 
-  static int StaticForMainAddr; // Just an address in this process.
-  Cmd.CommandLine.push_back("-resource-dir=" +
-                            CompilerInvocation::GetResourcesPath(
-                                "clangd", (void *)&StaticForMainAddr));
+  if (Mangler)
+    Mangler(Cmd, FilePath);
 
   using namespace clang::tooling::dependencies;
 
@@ -130,9 +132,10 @@ ModuleDependencyScanner::scan(PathRef FilePath) {
   return Result;
 }
 
-void ModuleDependencyScanner::globalScan() {
+void ModuleDependencyScanner::globalScan(
+    const ProjectModules::CommandMangler &Mangler) {
   for (auto &File : CDB->getAllFiles())
-    scan(File);
+    scan(File, Mangler);
 
   GlobalScanned = true;
 }
@@ -150,9 +153,9 @@ PathRef ModuleDependencyScanner::getSourceForModuleName(
   return {};
 }
 
-std::vector<std::string>
-ModuleDependencyScanner::getRequiredModules(PathRef File) {
-  auto ScanningResult = scan(File);
+std::vector<std::string> ModuleDependencyScanner::getRequiredModules(
+    PathRef File, const ProjectModules::CommandMangler &Mangler) {
+  auto ScanningResult = scan(File, Mangler);
   if (!ScanningResult)
     return {};
 
@@ -177,7 +180,11 @@ class ScanningAllProjectModules : public ProjectModules {
   ~ScanningAllProjectModules() override = default;
 
   std::vector<std::string> getRequiredModules(PathRef File) override {
-    return Scanner.getRequiredModules(File);
+    return Scanner.getRequiredModules(File, Mangler);
+  }
+
+  void setCommandMangler(CommandMangler Mangler) override {
+    this->Mangler = std::move(Mangler);
   }
 
   /// RequiredSourceFile is not used intentionally. See the comments of
@@ -185,12 +192,13 @@ class ScanningAllProjectModules : public ProjectModules {
   PathRef
   getSourceForModuleName(llvm::StringRef ModuleName,
                          PathRef RequiredSourceFile = PathRef()) override {
-    Scanner.globalScan();
+    Scanner.globalScan(Mangler);
     return Scanner.getSourceForModuleName(ModuleName);
   }
 
 private:
   ModuleDependencyScanner Scanner;
+  CommandMangler Mangler;
 };
 
 std::unique_ptr<ProjectModules> scanningProjectModules(
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 6d0af20e31260..1de7faf81746e 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -888,7 +888,7 @@ void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
   // might run while parsing, rather than at the end of a translation unit.
   // Hence we see more and more redecls over time.
   SymbolProviders[S.ID] =
-      include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);
+      include_cleaner::headersForSymbol(Sym, *PP, Opts.PragmaIncludes);
 }
 
 llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 1bb8e19cce23e..51723c797eabc 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -11,13 +11,14 @@
 /// code mode.
 #ifndef _WIN32
 
-#include "ModulesBuilder.h"
-#include "ScanningProjectModules.h"
 #include "Annotations.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "ModulesBuilder.h"
+#include "ScanningProjectModules.h"
 #include "TestTU.h"
 #include "support/ThreadsafeFS.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gmock/gmock.h"
@@ -191,6 +192,41 @@ export module M;
   EXPECT_TRUE(MInfo->canReuse(*Invocation, FS.view(TestDir)));
 }
 
+TEST_F(PrerequisiteModulesTests, ModuleWithArgumentPatch) {
+  MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+  CDB.ExtraClangFlags.push_back("-invalid-unknown-flag");
+
+  CDB.addFile("Dep.cppm", R"cpp(
+export module Dep;
+  )cpp");
+
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+import Dep;
+  )cpp");
+
+  // An invalid flag will break the module compilation and the
+  // getRequiredModules would return an empty array
+  auto ProjectModules = CDB.getProjectModules(getFullPath("M.cppm"));
+  EXPECT_TRUE(
+      ProjectModules->getRequiredModules(getFullPath("M.cppm")).empty());
+
+  // Set the mangler to filter out the invalid flag
+  ProjectModules->setCommandMangler(
+      [](tooling::CompileCommand &Command, PathRef) {
+        auto const It =
+            std::find(Command.CommandLine.begin(), Command.CommandLine.end(),
+                      "-invalid-unknown-flag");
+        Command.CommandLine.erase(It);
+      });
+
+  // And now it returns a non-empty list of required modules since the
+  // compilation succeeded
+  EXPECT_FALSE(
+      ProjectModules->getRequiredModules(getFullPath("M.cppm")).empty());
+}
+
 TEST_F(PrerequisiteModulesTests, ModuleWithDepTest) {
   MockDirectoryCompilationDatabase CDB(TestDir, FS);
 
@@ -435,7 +471,7 @@ void func() {
                     /*Callback=*/nullptr);
   EXPECT_TRUE(Preamble);
   EXPECT_TRUE(Preamble->RequiredModules);
-  
+
   auto Result = codeComplete(getFullPath("Use.cpp"), Test.point(),
                              Preamble.get(), Use, {});
   EXPECT_FALSE(Result.Completions.empty());
@@ -474,7 +510,7 @@ void func() {
                     /*Callback=*/nullptr);
   EXPECT_TRUE(Preamble);
   EXPECT_TRUE(Preamble->RequiredModules);
-  
+
   auto Result = signatureHelp(getFullPath("Use.cpp"), Test.point(),
                               *Preamble.get(), Use, MarkupKind::PlainText);
   EXPECT_FALSE(Result.signatures.empty());
diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
index 46ca3c9d08074..c3241763237d1 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
@@ -90,7 +90,7 @@ std::string fixIncludes(const AnalysisResults &Results,
 /// Returned headers are sorted by relevance, first element is the most
 /// likely provider for the symbol.
 llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
-                                           const SourceManager &SM,
+                                           const Preprocessor &PP,
                                            const PragmaIncludes *PI);
 } // namespace include_cleaner
 } // namespace clang
diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
index e3a4834cb19ae..a1781f4e24f2e 100644
--- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
@@ -64,7 +64,7 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
       // FIXME: Most of the work done here is repetitive. It might be useful to
       // have a cache/batching.
       SymbolReference SymRef{ND, Loc, RT};
-      return CB(SymRef, headersForSymbol(ND, SM, PI));
+      return CB(SymRef, headersForSymbol(ND, PP, PI));
     });
   }
   for (const SymbolReference &MacroRef : MacroRefs) {
@@ -72,7 +72,7 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
     if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)) ||
         shouldIgnoreMacroReference(PP, MacroRef.Target.macro()))
       continue;
-    CB(MacroRef, headersForSymbol(MacroRef.Target, SM, PI));
+    CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI));
   }
 }
 
diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h
index cd796c2da7b80..7d170fd15014d 100644
--- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h
+++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h
@@ -25,6 +25,8 @@
 #include "clang-include-cleaner/Analysis.h"
 #include "clang-include-cleaner/Record.h"
 #include "clang-include-cleaner/Types.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include <vector>
 
@@ -57,13 +59,14 @@ llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
                                               const PragmaIncludes *PI);
 
 /// A set of locations that provides the declaration.
-std::vector<Hinted<SymbolLocation>> locateSymbol(const Symbol &S);
+std::vector<Hinted<SymbolLocation>> locateSymbol(const Symbol &S,
+                                                 const LangOptions &LO);
 
 /// Write an HTML summary of the analysis to the given stream.
 void writeHTMLReport(FileID File, const Includes &,
                      llvm::ArrayRef<Decl *> Roots,
                      llvm::ArrayRef<SymbolReference> MacroRefs, ASTContext &Ctx,
-                     const HeaderSearch &HS, PragmaIncludes *PI,
+                     const Preprocessor &PP, PragmaIncludes *PI,
                      llvm::raw_ostream &OS);
 
 } // namespace include_cleaner
diff --git a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
index 7b28d1c252d71..b96d9a70728c2 100644
--- a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
+++ b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/FileEntry.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
@@ -239,8 +240,9 @@ llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
 }
 
 llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
-                                           const SourceManager &SM,
+                                           const Preprocessor &PP,
                                            const PragmaIncludes *PI) {
+  const auto &SM = PP.getSourceManager();
   // Get headers for all the locations providing Symbol. Same header can be
   // reached through different traversals, deduplicate those into a single
   // Header by merging their hints.
@@ -248,7 +250,7 @@ llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
   if (auto SpecialHeaders = headersForSpecialSymbol(S, SM, PI)) {
     Headers = std::move(*SpecialHeaders);
   } else {
-    for (auto &Loc : locateSymbol(S))
+    for (auto &Loc : locateSymbol(S, PP.getLangOpts()))
       Headers.append(applyHints(findHeaders(Loc, SM, PI), Loc.Hint));
   }
   // If two Headers probably refer to the same file (e.g. Verbatim(foo.h) and
diff --git a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
index bbe8bc230c6e2..92c7c554ca50c 100644
--- a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
+++ b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
@@ -21,6 +21,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
@@ -135,7 +136,7 @@ class Reporter {
   llvm::raw_ostream &OS;
   const ASTContext &Ctx;
   const SourceManager &SM;
-  const HeaderSearch &HS;
+  const Preprocessor &PP;
   const include_cleaner::Includes &Includes;
   const PragmaIncludes *PI;
   FileID MainFile;
@@ -170,9 +171,9 @@ class Reporter {
 
   void fillTarget(Ref &R) {
     // Duplicates logic from walkUsed(), which doesn't expose SymbolLocations.
-    for (auto &Loc : locateSymbol(R.Sym))
+    for (auto &Loc : locateSymbol(R.Sym, Ctx.getLangOpts()))
       R.Locations.push_back(Loc);
-    R.Headers = headersForSymbol(R.Sym, SM, PI);
+    R.Headers = headersForSymbol(R.Sym, PP, PI);
 
     for (const auto &H : R.Headers) {
       R.Includes.append(Includes.match(H));
@@ -189,14 +190,15 @@ class Reporter {
                      R.Includes.end());
 
     if (!R.Headers.empty())
-      R.Insert = spellHeader({R.Headers.front(), HS, MainFE});
+      R.Insert =
+          spellHeader({R.Headers.front(), PP.getHeaderSearchInfo(), MainFE});
   }
 
 public:
-  Reporter(llvm::raw_ostream &OS, ASTContext &Ctx, const HeaderSearch &HS,
+  Reporter(llvm::raw_ostream &OS, ASTContext &Ctx, const Preprocessor &PP,
            const include_cleaner::Includes &Includes, const PragmaIncludes *PI,
            FileID MainFile)
-      : OS(OS), Ctx(Ctx), SM(Ctx.getSourceManager()), HS(HS),
+      : OS(OS), Ctx(Ctx), SM(Ctx.getSourceManager()), PP(PP),
         Includes(Includes), PI(PI), MainFile(MainFile),
         MainFE(SM.getFileEntryForID(MainFile)) {}
 
@@ -498,9 +500,9 @@ class Reporter {
 void writeHTMLReport(FileID File, const include_cleaner::Includes &Includes,
                      llvm::ArrayRef<Decl *> Roots,
                      llvm::ArrayRef<SymbolReference> MacroRefs, ASTContext &Ctx,
-                     const HeaderSearch &HS, PragmaIncludes *PI,
+                     const Preprocessor &PP, PragmaIncludes *PI,
                      llvm::raw_ostream &OS) {
-  Reporter R(OS, Ctx, HS, Includes, PI, File);
+  Reporter R(OS, Ctx, PP, Includes, PI, File);
   const auto& SM = Ctx.getSourceManager();
   for (Decl *Root : Roots)
     walkAST(*Root, [&](SourceLocation Loc, const NamedDecl &D, RefType T) {
diff --git a/clang-tools-extra/include-cleaner/lib/LocateSymbol.cpp b/clang-tools-extra/include-cleaner/lib/LocateSymbol.cpp
index 78e783a62eb27..b7433305152f9 100644
--- a/clang-tools-extra/include-cleaner/lib/LocateSymbol.cpp
+++ b/clang-tools-extra/include-cleaner/lib/LocateSymbol.cpp
@@ -54,20 +54,24 @@ std::vector<Hinted<SymbolLocation>> locateDecl(const Decl &D) {
   return Result;
 }
 
-std::vector<Hinted<SymbolLocation>> locateMacro(const Macro &M) {
+std::vector<Hinted<SymbolLocation>> locateMacro(const Macro &M,
+                                                const tooling::stdlib::Lang L) {
   // FIXME: Should we also provide physical locations?
-  if (auto SS = tooling::stdlib::Symbol::named("", M.Name->getName()))
+  if (auto SS = tooling::stdlib::Symbol::named("", M.Name->getName(), L))
     return {{*SS, Hints::CompleteSymbol}};
   return {{M.Definition, Hints::CompleteSymbol}};
 }
 } // namespace
 
-std::vector<Hinted<SymbolLocation>> locateSymbol(const Symbol &S) {
+std::vector<Hinted<SymbolLocation>> locateSymbol(const Symbol &S,
+                                                 const LangOptions &LO) {
+  const auto L = !LO.CPlusPlus && LO.C99 ? tooling::stdlib::Lang::C
+                                         : tooling::stdlib::Lang::CXX;
   switch (S.kind()) {
   case Symbol::Declaration:
     return locateDecl(S.declaration());
   case Symbol::Macro:
-    return locateMacro(S.macro());
+    return locateMacro(S.macro(), L);
   }
   llvm_unreachable("Unknown Symbol::Kind enum");
 }
diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index f85dbc0e0c31f..1d9458ffc4d32 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -216,10 +216,9 @@ class Action : public clang::ASTFrontendAction {
       ++Errors;
       return;
     }
-    writeHTMLReport(
-        AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, AST.Roots,
-        PP.MacroReferences, *AST.Ctx,
-        getCompilerInstance().getPreprocessor().getHeaderSearchInfo(), &PI, OS);
+    writeHTMLReport(AST.Ctx->getSourceManager().getMainFileID(), PP.Includes,
+                    AST.Roots, PP.MacroReferences, *AST.Ctx,
+                    getCompilerInstance().getPreprocessor(), &PI, OS);
   }
 };
 class ActionFactory : public tooling::FrontendActionFactory {
diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
index 84e02e1d0d621..0ac243937e6e4 100644
--- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
@@ -306,7 +306,7 @@ class HeadersForSymbolTest : public FindHeadersTest {
     if (!V.Out)
       ADD_FAILURE() << "Couldn't find any decls named " << Name << ".";
     assert(V.Out);
-    return headersForSymbol(*V.Out, AST->sourceManager(), &PI);
+    return headersForSymbol(*V.Out, AST->preprocessor(), &PI);
   }
   llvm::SmallVector<Header> headersForFoo() { return headersFor("foo"); }
 };
@@ -611,13 +611,12 @@ TEST_F(HeadersForSymbolTest, AmbiguousStdSymbolsUsingShadow) {
   Visitor V;
   V.TraverseDecl(AST->context().getTranslationUnitDecl());
   ASSERT_TRUE(V.Out) << "Couldn't find a DeclRefExpr!";
-  EXPECT_THAT(headersForSymbol(*(V.Out->getFoundDecl()),
-                               AST->sourceManager(), &PI),
-              UnorderedElementsAre(
-                  Header(*tooling::stdlib::Header::named("<cstdio>"))));
+  EXPECT_THAT(
+      headersForSymbol(*(V.Out->getFoundDecl()), AST->preprocessor(), &PI),
+      UnorderedElementsAre(
+          Header(*tooling::stdlib::Header::named("<cstdio>"))));
 }
 
-
 TEST_F(HeadersForSymbolTest, StandardHeaders) {
   Inputs.Code = R"cpp(
     #include "stdlib_internal.h"
@@ -636,6 +635,30 @@ TEST_F(HeadersForSymbolTest, StandardHeaders) {
                            tooling::stdlib::Header::named("<assert.h>")));
 }
 
+TEST_F(HeadersForSymbolTest, StdlibLangForMacros) {
+  Inputs.Code = R"cpp(
+    #define EOF 0
+    void foo() { EOF; }
+  )cpp";
+  {
+    buildAST();
+    const Macro Eof{AST->preprocessor().getIdentifierInfo("EOF"), {}};
+    EXPECT_THAT(
+        headersForSymbol(Eof, AST->preprocessor(), nullptr),
+        UnorderedElementsAre(tooling::stdlib::Header::named("<cstdio>"),
+                             tooling::stdlib::Header::named("<stdio.h>")));
+  }
+
+  {
+    Inputs.ExtraArgs.push_back("-xc");
+    buildAST();
+    const Macro Eof{AST->preprocessor().getIdentifierInfo("EOF"), {}};
+    EXPECT_THAT(headersForSymbol(Eof, AST->preprocessor(), nullptr),
+                UnorderedElementsAre(tooling::stdlib::Header::named(
+                    "<stdio.h>", tooling::stdlib::Lang::C)));
+  }
+}
+
 TEST_F(HeadersForSymbolTest, ExporterNoNameMatch) {
   Inputs.Code = R"cpp(
     #include "exporter/foo.h"
diff --git a/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp b/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp
index 756757cfd0f09..1e7baf142a75a 100644
--- a/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Testing/TestAST.h"
@@ -96,6 +97,8 @@ struct LocateExample {
       Results.emplace_back(SM.getComposedLoc(FID, Offset));
     return Results;
   }
+
+  const LangOptions &langOpts() { return AST.preprocessor().getLangOpts(); }
 };
 
 TEST(LocateSymbol, Decl) {
@@ -110,7 +113,7 @@ TEST(LocateSymbol, Decl) {
   for (auto &Case : Cases) {
     SCOPED_TRACE(Case);
     LocateExample Test(Case);
-    EXPECT_THAT(locateSymbol(Test.findDecl("foo")),
+    EXPECT_THAT(locateSymbol(Test.findDecl("foo"), Test.langOpts()),
                 ElementsAreArray(Test.points()));
   }
 }
@@ -119,12 +122,12 @@ TEST(LocateSymbol, Stdlib) {
   {
     LocateExample Test("namespace std { struct vector; }");
     EXPECT_THAT(
-        locateSymbol(Test.findDecl("vector")),
+        locateSymbol(Test.findDecl("vector"), Test.langOpts()),
         ElementsAre(*tooling::stdlib::Symbol::named("std::", "vector")));
   }
   {
     LocateExample Test("#define assert(x)\nvoid foo() { assert(true); }");
-    EXPECT_THAT(locateSymbol(Test.findMacro("assert")),
+    EXPECT_THAT(locateSymbol(Test.findMacro("assert"), Test.langOpts()),
                 ElementsAre(*tooling::stdlib::Symbol::named("", "assert")));
   }
 }
@@ -132,7 +135,7 @@ TEST(LocateSymbol, Stdlib) {
 TEST(LocateSymbol, Macros) {
   // Make sure we preserve the last one.
   LocateExample Test("#define FOO\n#undef FOO\n#define ^FOO");
-  EXPECT_THAT(locateSymbol(Test.findMacro("FOO")),
+  EXPECT_THAT(locateSymbol(Test.findMacro("FOO"), Test.langOpts()),
               ElementsAreArray(Test.points()));
 }
 
@@ -143,7 +146,7 @@ TEST(LocateSymbol, CompleteSymbolHint) {
   {
     // stdlib symbols are always complete.
     LocateExample Test("namespace std { struct vector; }");
-    EXPECT_THAT(locateSymbol(Test.findDecl("vector")),
+    EXPECT_THAT(locateSymbol(Test.findDecl("vector"), Test.langOpts()),
                 ElementsAre(HintedSymbol(
                     *tooling::stdlib::Symbol::named("std::", "vector"),
                     Hints::CompleteSymbol)));
@@ -151,7 +154,7 @@ TEST(LocateSymbol, CompleteSymbolHint) {
   {
     // macros are always complete.
     LocateExample Test("#define ^FOO");
-    EXPECT_THAT(locateSymbol(Test.findMacro("FOO")),
+    EXPECT_THAT(locateSymbol(Test.findMacro("FOO"), Test.langOpts()),
                 ElementsAre(HintedSymbol(Test.points().front(),
                                          Hints::CompleteSymbol)));
   }
@@ -165,7 +168,7 @@ TEST(LocateSymbol, CompleteSymbolHint) {
     for (auto &Case : Cases) {
       SCOPED_TRACE(Case);
       LocateExample Test(Case);
-      EXPECT_THAT(locateSymbol(Test.findDecl("foo")),
+      EXPECT_THAT(locateSymbol(Test.findDecl("foo"), Test.langOpts()),
                   ElementsAre(HintedSymbol(Test.points().front(), Hints::None),
                               HintedSymbol(Test.points().back(),
                                            Hints::CompleteSymbol)));
@@ -181,7 +184,7 @@ TEST(LocateSymbol, CompleteSymbolHint) {
     for (auto &Case : Cases) {
       SCOPED_TRACE(Case);
       LocateExample Test(Case);
-      EXPECT_THAT(locateSymbol(Test.findDecl("foo")),
+      EXPECT_THAT(locateSymbol(Test.findDecl("foo"), Test.langOpts()),
                   Each(Field(&Hinted<SymbolLocation>::Hint,
                              Eq(Hints::CompleteSymbol))));
     }
diff --git a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
index a31b6692c9ac7..8a81fbfc09313 100644
--- a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
+++ b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-reorder-fields -record-name Foo -fields-order e1,e3,e2,a,c,b %s -- | FileCheck %s
+// RUN: clang-reorder-fields -record-name Foo -fields-order c,e1,e3,e2,a,b %s -- | FileCheck %s
 
 class Foo {
   int a; // Trailing comment for a.
@@ -12,12 +12,15 @@ class Foo {
   int e3 /*c-like*/;
 };
 
-// CHECK:       /*c-like*/ int e1;
+// Note: the position of the empty line is somewhat arbitrary.
+
+// CHECK:       // Prefix comments for c.
+// CHECK-NEXT:  int c;
+// CHECK-NEXT:  /*c-like*/ int e1;
 // CHECK-NEXT:  int e3 /*c-like*/;
+// CHECK-EMPTY:
 // CHECK-NEXT:  int /*c-like*/ e2;
 // CHECK-NEXT:  int a; // Trailing comment for a.
-// CHECK-NEXT:  // Prefix comments for c.
-// CHECK-NEXT:  int c;
 // CHECK-NEXT:  int b; // Multiline
 // CHECK-NEXT:         // trailing for b.
 
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 511a967f66d10..30a2325949f48 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3946,21 +3946,6 @@ the configuration (without a prefix: ``Auto``).
    This is an experimental flag, that might go away or be renamed. Do
    not use this in config files, etc. Use at your own risk.
 
-.. _ExportBlockIndentation:
-
-**ExportBlockIndentation** (``Boolean``) :versionbadge:`clang-format 20` :ref:`¶ <ExportBlockIndentation>`
-  If ``true``, clang-format will indent the body of an ``export { ... }``
-  block. This doesn't affect the formatting of anything else related to
-  exported declarations.
-
-  .. code-block:: c++
-
-     true:                     false:
-     export {          vs.     export {
-       void foo();             void foo();
-       void bar();             void bar();
-     }                         }
-
 .. _FixNamespaceComments:
 
 **FixNamespaceComments** (``Boolean``) :versionbadge:`clang-format 5` :ref:`¶ <FixNamespaceComments>`
@@ -4228,6 +4213,21 @@ the configuration (without a prefix: ``Auto``).
        plop();                                  plop();
      }                                      }
 
+.. _IndentExportBlock:
+
+**IndentExportBlock** (``Boolean``) :versionbadge:`clang-format 20` :ref:`¶ <IndentExportBlock>`
+  If ``true``, clang-format will indent the body of an ``export { ... }``
+  block. This doesn't affect the formatting of anything else related to
+  exported declarations.
+
+  .. code-block:: c++
+
+     true:                     false:
+     export {          vs.     export {
+       void foo();             void foo();
+       void bar();             void bar();
+     }                         }
+
 .. _IndentExternBlock:
 
 **IndentExternBlock** (``IndentExternBlockStyle``) :versionbadge:`clang-format 11` :ref:`¶ <IndentExternBlock>`
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index bbeeefe82282b..c42b88015e269 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -5656,7 +5656,7 @@ The ``#pragma clang section`` directive obeys the following rules:
 
 * The pragma clang section is enabled automatically, without need of any flags.
 
-* This feature is only defined to work sensibly for ELF and Mach-O targets.
+* This feature is only defined to work sensibly for ELF, Mach-O and COFF targets.
 
 * If section name is specified through _attribute_((section("myname"))), then
   the attribute name gains precedence.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f75c726e2751c..cad17c1b3957b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -318,6 +318,8 @@ C++23 Feature Support
 
 - ``__cpp_explicit_this_parameter`` is now defined. (#GH82780)
 
+- Add support for `P2280R4 Using unknown pointers and references in constant expressions <https://wg21.link/P2280R4>`_. (#GH63139)
+
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 
@@ -800,6 +802,8 @@ Improvements to Clang's diagnostics
 
 - Fix false positives warning for non-std functions with name `infinity` (#123231).
 
+- Clang now emits a ``-Wignored-qualifiers`` diagnostic when a base class includes cv-qualifiers (#GH55474).
+
 Improvements to Clang's time-trace
 ----------------------------------
 
@@ -966,7 +970,7 @@ Bug Fixes to C++ Support
   constraints are applied. (#GH122134)
 - Fixed canonicalization of pack indexing types - Clang did not always recognized identical pack indexing. (#GH123033)
 - Fixed a nested lambda substitution issue for constraint evaluation. (#GH123441)
-
+- Fixed various false diagnostics related to the use of immediate functions. (#GH123472)
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1122,6 +1126,8 @@ Windows Support
   When `-fms-compatibility-version=18.00` or prior is set on the command line this Microsoft extension is still
   allowed as VS2013 and prior allow it.
 
+- Clang now supports the ``#pragma clang section`` directive for COFF targets.
+
 LoongArch Support
 ^^^^^^^^^^^^^^^^^
 
@@ -1235,7 +1241,7 @@ clang-format
 - Adds ``VariableTemplates`` option.
 - Adds support for bash globstar in ``.clang-format-ignore``.
 - Adds ``WrapNamespaceBodyWithEmptyLines`` option.
-- Adds the ``ExportBlockIndentation`` option.
+- Adds the ``IndentExportBlock`` option.
 
 libclang
 --------
diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h
index 4401f3a8ff482..833a78c77871d 100644
--- a/clang/include/clang/AST/APValue.h
+++ b/clang/include/clang/AST/APValue.h
@@ -247,6 +247,7 @@ class APValue {
   struct NoLValuePath {};
   struct UninitArray {};
   struct UninitStruct {};
+  struct ConstexprUnknown {};
 
   template <typename Impl> friend class clang::serialization::BasicReaderBase;
   friend class ASTImporter;
@@ -254,6 +255,7 @@ class APValue {
 
 private:
   ValueKind Kind;
+  bool AllowConstexprUnknown : 1;
 
   struct ComplexAPSInt {
     APSInt Real, Imag;
@@ -312,32 +314,39 @@ class APValue {
   DataType Data;
 
 public:
+  bool allowConstexprUnknown() const { return AllowConstexprUnknown; }
+
+  void setConstexprUnknown(bool IsConstexprUnknown = true) {
+    AllowConstexprUnknown = IsConstexprUnknown;
+  }
+
   /// Creates an empty APValue of type None.
-  APValue() : Kind(None) {}
+  APValue() : Kind(None), AllowConstexprUnknown(false) {}
   /// Creates an integer APValue holding the given value.
-  explicit APValue(APSInt I) : Kind(None) {
+  explicit APValue(APSInt I) : Kind(None), AllowConstexprUnknown(false) {
     MakeInt(); setInt(std::move(I));
   }
   /// Creates a float APValue holding the given value.
-  explicit APValue(APFloat F) : Kind(None) {
+  explicit APValue(APFloat F) : Kind(None), AllowConstexprUnknown(false) {
     MakeFloat(); setFloat(std::move(F));
   }
   /// Creates a fixed-point APValue holding the given value.
-  explicit APValue(APFixedPoint FX) : Kind(None) {
+  explicit APValue(APFixedPoint FX) : Kind(None), AllowConstexprUnknown(false) {
     MakeFixedPoint(std::move(FX));
   }
   /// Creates a vector APValue with \p N elements. The elements
   /// are read from \p E.
-  explicit APValue(const APValue *E, unsigned N) : Kind(None) {
+  explicit APValue(const APValue *E, unsigned N)
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeVector(); setVector(E, N);
   }
   /// Creates an integer complex APValue with the given real and imaginary
   /// values.
-  APValue(APSInt R, APSInt I) : Kind(None) {
+  APValue(APSInt R, APSInt I) : Kind(None), AllowConstexprUnknown(false) {
     MakeComplexInt(); setComplexInt(std::move(R), std::move(I));
   }
   /// Creates a float complex APValue with the given real and imaginary values.
-  APValue(APFloat R, APFloat I) : Kind(None) {
+  APValue(APFloat R, APFloat I) : Kind(None), AllowConstexprUnknown(false) {
     MakeComplexFloat(); setComplexFloat(std::move(R), std::move(I));
   }
   APValue(const APValue &RHS);
@@ -348,7 +357,7 @@ class APValue {
   /// \param IsNullPtr Whether this lvalue is a null pointer.
   APValue(LValueBase Base, const CharUnits &Offset, NoLValuePath,
           bool IsNullPtr = false)
-      : Kind(None) {
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeLValue();
     setLValue(Base, Offset, NoLValuePath{}, IsNullPtr);
   }
@@ -362,23 +371,36 @@ class APValue {
   APValue(LValueBase Base, const CharUnits &Offset,
           ArrayRef<LValuePathEntry> Path, bool OnePastTheEnd,
           bool IsNullPtr = false)
-      : Kind(None) {
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeLValue();
     setLValue(Base, Offset, Path, OnePastTheEnd, IsNullPtr);
   }
+  /// Creates a constexpr unknown lvalue APValue.
+  /// \param Base The base of the lvalue.
+  /// \param Offset The offset of the lvalue.
+  /// \param IsNullPtr Whether this lvalue is a null pointer.
+  APValue(LValueBase Base, const CharUnits &Offset, ConstexprUnknown,
+          bool IsNullPtr = false)
+      : Kind(None), AllowConstexprUnknown(true) {
+    MakeLValue();
+    setLValue(Base, Offset, NoLValuePath{}, IsNullPtr);
+  }
+
   /// Creates a new array APValue.
   /// \param UninitArray Marker. Pass an empty UninitArray.
   /// \param InitElts Number of elements you're going to initialize in the
   /// array.
   /// \param Size Full size of the array.
-  APValue(UninitArray, unsigned InitElts, unsigned Size) : Kind(None) {
+  APValue(UninitArray, unsigned InitElts, unsigned Size)
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeArray(InitElts, Size);
   }
   /// Creates a new struct APValue.
   /// \param UninitStruct Marker. Pass an empty UninitStruct.
   /// \param NumBases Number of bases.
   /// \param NumMembers Number of members.
-  APValue(UninitStruct, unsigned NumBases, unsigned NumMembers) : Kind(None) {
+  APValue(UninitStruct, unsigned NumBases, unsigned NumMembers)
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeStruct(NumBases, NumMembers);
   }
   /// Creates a new union APValue.
@@ -386,7 +408,7 @@ class APValue {
   /// \param ActiveValue The value of the active union member.
   explicit APValue(const FieldDecl *ActiveDecl,
                    const APValue &ActiveValue = APValue())
-      : Kind(None) {
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeUnion();
     setUnion(ActiveDecl, ActiveValue);
   }
@@ -395,14 +417,15 @@ class APValue {
   /// \param IsDerivedMember Whether member is a derived one.
   /// \param Path The path of the member.
   APValue(const ValueDecl *Member, bool IsDerivedMember,
-          ArrayRef<const CXXRecordDecl*> Path) : Kind(None) {
+          ArrayRef<const CXXRecordDecl *> Path)
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeMemberPointer(Member, IsDerivedMember, Path);
   }
   /// Creates a new address label diff APValue.
   /// \param LHSExpr The left-hand side of the difference.
   /// \param RHSExpr The right-hand side of the difference.
-  APValue(const AddrLabelExpr* LHSExpr, const AddrLabelExpr* RHSExpr)
-      : Kind(None) {
+  APValue(const AddrLabelExpr *LHSExpr, const AddrLabelExpr *RHSExpr)
+      : Kind(None), AllowConstexprUnknown(false) {
     MakeAddrLabelDiff(); setAddrLabelDiff(LHSExpr, RHSExpr);
   }
   static APValue IndeterminateValue() {
diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
index 7442f4aad531b..3344959072c22 100644
--- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
+++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
@@ -71,6 +71,10 @@ class ExprMutationAnalyzer {
     const Stmt *findReferenceMutation(const Expr *Exp);
     const Stmt *findFunctionArgMutation(const Expr *Exp);
 
+    const Stmt *findPointeeValueMutation(const Expr *Exp);
+    const Stmt *findPointeeMemberMutation(const Expr *Exp);
+    const Stmt *findPointeeToNonConst(const Expr *Exp);
+
     const Stmt &Stm;
     ASTContext &Context;
     Memoized &Memorized;
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index bbf4886b5cf05..60c360d4a9e07 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1977,16 +1977,16 @@ def AtomicNandFetch : AtomicBuiltin {
   let Prototype = "void(...)";
 }
 
-def AtomicTestAndSet : Builtin {
+def AtomicTestAndSet : AtomicBuiltin {
   let Spellings = ["__atomic_test_and_set"];
-  let Attributes = [NoThrow];
-  let Prototype = "bool(void volatile*, int)";
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "bool(...)";
 }
 
-def AtomicClear : Builtin {
+def AtomicClear : AtomicBuiltin {
   let Spellings = ["__atomic_clear"];
-  let Attributes = [NoThrow];
-  let Prototype = "void(void volatile*, int)";
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
 }
 
 def AtomicThreadFence : Builtin {
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 969dd9e41ebfa..37b4e6ff77fda 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -28,7 +28,9 @@
 #pragma push_macro("SM_90")
 #pragma push_macro("SM_90a")
 #pragma push_macro("SM_100")
-#define SM_100 "sm_100"
+#pragma push_macro("SM_100a")
+#define SM_100a "sm_100a"
+#define SM_100 "sm_100|" SM_100a
 #define SM_90a "sm_90a"
 #define SM_90 "sm_90|" SM_90a "|" SM_100
 #define SM_89 "sm_89|" SM_90
@@ -1091,6 +1093,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
 #pragma pop_macro("SM_90")
 #pragma pop_macro("SM_90a")
 #pragma pop_macro("SM_100")
+#pragma pop_macro("SM_100a")
 #pragma pop_macro("PTX42")
 #pragma pop_macro("PTX60")
 #pragma pop_macro("PTX61")
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 18fc10eb85c02..a6c932967f528 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4936,15 +4936,15 @@ let Features = "avx10.2-512,sm4", Attributes = [NoThrow, RequiredVectorWidth<512
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
-  def vminmaxnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int)">;
+  def vminmaxbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vminmaxnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int)">;
+  def vminmaxbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
-  def vminmaxnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int)">;
+  def vminmaxbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 0f4ed13d5f3d8..1ab8c7fb4d3c3 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -413,9 +413,6 @@ CODEGENOPT(StrictReturn, 1, 1)
 /// Whether emit pseudo probes for sample pgo profile collection.
 CODEGENOPT(PseudoProbeForProfiling, 1, 0)
 
-/// Whether 3-component vector type is preserved.
-CODEGENOPT(PreserveVec3Type, 1, 0)
-
 CODEGENOPT(NoPLT, 1, 0)
 
 /// Whether to emit all vtables
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index c2a4addf488df..f33ba46233a7a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -44,9 +44,10 @@ enum class CudaVersion {
   CUDA_124,
   CUDA_125,
   CUDA_126,
+  CUDA_128,
   FULLY_SUPPORTED = CUDA_123,
   PARTIALLY_SUPPORTED =
-      CUDA_126, // Partially supported. Proceed with a warning.
+      CUDA_128, // Partially supported. Proceed with a warning.
   NEW = 10000,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
@@ -80,6 +81,7 @@ enum class OffloadArch {
   SM_90,
   SM_90a,
   SM_100,
+  SM_100a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index db54312ad965e..0175c20daf241 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -487,6 +487,10 @@ def err_noreturn_non_function : Error<
 def warn_qual_return_type : Warning<
   "'%0' type qualifier%s1 on return type %plural{1:has|:have}1 no effect">,
   InGroup<IgnoredQualifiers>, DefaultIgnore;
+def warn_qual_base_type : Warning<
+  "'%0' qualifier%s1 on base class type %2 have no effect">,
+  InGroup<IgnoredQualifiers>, DefaultIgnore;
+
 def warn_deprecated_redundant_constexpr_static_def : Warning<
   "out-of-line definition of constexpr static data member is redundant "
   "in C++17 and is deprecated">,
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 3b833240e5b68..a980be853d53e 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -532,6 +532,8 @@ BENIGN_LANGOPT(CheckConstexprFunctionBodies, 1, 1,
 
 LANGOPT(BoundsSafety, 1, 0, "Bounds safety extension for C")
 
+LANGOPT(PreserveVec3Type, 1, 0, "Preserve 3-component vector type")
+
 #undef LANGOPT
 #undef COMPATIBLE_LANGOPT
 #undef BENIGN_LANGOPT
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index ac1c139b20943..e7001bac450e8 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2280,15 +2280,15 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
-  def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
-  def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
-  def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
-  def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
+  def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [], []>;
+  def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [], []>;
+  def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq1", [], []>;
+  def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq2", [], []>;
   // TBLQ, TBXQ
-  def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
-  def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
+  def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tblq">;
+  def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tbxq">;
   // EXTQ
-  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
   // PMOV
   // Move to Pred
@@ -2314,7 +2314,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2p1" in {
   // DUPQ
-  def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUc", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>;
+  def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUcm", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>;
   def SVDUP_LANEQ_H  : SInst<"svdup_laneq[_{d}]", "ddi",  "sUsh", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
   def SVDUP_LANEQ_S  : SInst<"svdup_laneq[_{d}]", "ddi",  "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
   def SVDUP_LANEQ_D  : SInst<"svdup_laneq[_{d}]", "ddi",  "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c4b9743597bb2..852051e772fc1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8245,10 +8245,6 @@ def fhlsl_strict_availability : Flag<["-"], "fhlsl-strict-availability">,
   Group<hlsl_Group>,
   MarshallingInfoFlag<LangOpts<"HLSLStrictAvailability">>;
 
-def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
-  HelpText<"Preserve 3-component vector type">,
-  MarshallingInfoFlag<CodeGenOpts<"PreserveVec3Type">>,
-  ImpliedByAnyOf<[hlsl.KeyPath]>;
 def fwchar_type_EQ : Joined<["-"], "fwchar-type=">,
   HelpText<"Select underlying type for wchar_t">,
   Values<"char,short,int">,
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index c31423841ec1a..fd526f189ec83 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2676,19 +2676,6 @@ struct FormatStyle {
   /// \version 3.7
   bool ExperimentalAutoDetectBinPacking;
 
-  /// If ``true``, clang-format will indent the body of an ``export { ... }``
-  /// block. This doesn't affect the formatting of anything else related to
-  /// exported declarations.
-  /// \code
-  ///    true:                     false:
-  ///    export {          vs.     export {
-  ///      void foo();             void foo();
-  ///      void bar();             void bar();
-  ///    }                         }
-  /// \endcode
-  /// \version 20
-  bool ExportBlockIndentation;
-
   /// If ``true``, clang-format adds missing namespace end comments for
   /// namespaces and fixes invalid existing ones. This doesn't affect short
   /// namespaces, which are controlled by ``ShortNamespaceLines``.
@@ -2815,22 +2802,18 @@ struct FormatStyle {
   /// \version 3.3
   bool IndentCaseLabels;
 
-  /// Indent goto labels.
-  ///
-  /// When ``false``, goto labels are flushed left.
+  /// If ``true``, clang-format will indent the body of an ``export { ... }``
+  /// block. This doesn't affect the formatting of anything else related to
+  /// exported declarations.
   /// \code
-  ///    true:                                  false:
-  ///    int f() {                      vs.     int f() {
-  ///      if (foo()) {                           if (foo()) {
-  ///      label1:                              label1:
-  ///        bar();                                 bar();
-  ///      }                                      }
-  ///    label2:                                label2:
-  ///      return 1;                              return 1;
-  ///    }                                      }
+  ///    true:                     false:
+  ///    export {          vs.     export {
+  ///      void foo();             void foo();
+  ///      void bar();             void bar();
+  ///    }                         }
   /// \endcode
-  /// \version 10
-  bool IndentGotoLabels;
+  /// \version 20
+  bool IndentExportBlock;
 
   /// Indents extern blocks
   enum IndentExternBlockStyle : int8_t {
@@ -2872,6 +2855,23 @@ struct FormatStyle {
   /// \version 11
   IndentExternBlockStyle IndentExternBlock;
 
+  /// Indent goto labels.
+  ///
+  /// When ``false``, goto labels are flushed left.
+  /// \code
+  ///    true:                                  false:
+  ///    int f() {                      vs.     int f() {
+  ///      if (foo()) {                           if (foo()) {
+  ///      label1:                              label1:
+  ///        bar();                                 bar();
+  ///      }                                      }
+  ///    label2:                                label2:
+  ///      return 1;                              return 1;
+  ///    }                                      }
+  /// \endcode
+  /// \version 10
+  bool IndentGotoLabels;
+
   /// Options for indenting preprocessor directives.
   enum PPDirectiveIndentStyle : int8_t {
     /// Does not indent any directives.
@@ -5267,7 +5267,6 @@ struct FormatStyle {
            EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
-           ExportBlockIndentation == R.ExportBlockIndentation &&
            FixNamespaceComments == R.FixNamespaceComments &&
            ForEachMacros == R.ForEachMacros &&
            IncludeStyle.IncludeBlocks == R.IncludeStyle.IncludeBlocks &&
@@ -5280,6 +5279,7 @@ struct FormatStyle {
            IndentAccessModifiers == R.IndentAccessModifiers &&
            IndentCaseBlocks == R.IndentCaseBlocks &&
            IndentCaseLabels == R.IndentCaseLabels &&
+           IndentExportBlock == R.IndentExportBlock &&
            IndentExternBlock == R.IndentExternBlock &&
            IndentGotoLabels == R.IndentGotoLabels &&
            IndentPPDirectives == R.IndentPPDirectives &&
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 82a041ea3f848..89c8ae354dafc 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -557,6 +557,12 @@ class Lexer : public PreprocessorLexer {
                                             const LangOptions &LangOpts,
                                             bool IncludeComments = false);
 
+  /// Finds the token that comes before the given location.
+  static std::optional<Token> findPreviousToken(SourceLocation Loc,
+                                                const SourceManager &SM,
+                                                const LangOptions &LangOpts,
+                                                bool IncludeComments);
+
   /// Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the
   /// location immediately after the specified token. If the token is not found
diff --git a/clang/include/clang/Sema/SemaCodeCompletion.h b/clang/include/clang/Sema/SemaCodeCompletion.h
index 50409439389b0..e931596c215d3 100644
--- a/clang/include/clang/Sema/SemaCodeCompletion.h
+++ b/clang/include/clang/Sema/SemaCodeCompletion.h
@@ -23,6 +23,7 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/Designator.h"
+#include "clang/Sema/HeuristicResolver.h"
 #include "clang/Sema/Ownership.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/StringRef.h"
@@ -43,6 +44,7 @@ class SemaCodeCompletion : public SemaBase {
 
   /// Code-completion consumer.
   CodeCompleteConsumer *CodeCompleter;
+  HeuristicResolver Resolver;
 
   /// Describes the context in which code completion occurs.
   enum ParserCompletionContext {
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index f9e08b70d6ab0..3b814be266330 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -308,7 +308,8 @@ APValue::UnionData::~UnionData () {
   delete Value;
 }
 
-APValue::APValue(const APValue &RHS) : Kind(None) {
+APValue::APValue(const APValue &RHS)
+    : Kind(None), AllowConstexprUnknown(RHS.AllowConstexprUnknown) {
   switch (RHS.getKind()) {
   case None:
   case Indeterminate:
@@ -379,13 +380,17 @@ APValue::APValue(const APValue &RHS) : Kind(None) {
   }
 }
 
-APValue::APValue(APValue &&RHS) : Kind(RHS.Kind), Data(RHS.Data) {
+APValue::APValue(APValue &&RHS)
+    : Kind(RHS.Kind), AllowConstexprUnknown(RHS.AllowConstexprUnknown),
+      Data(RHS.Data) {
   RHS.Kind = None;
 }
 
 APValue &APValue::operator=(const APValue &RHS) {
   if (this != &RHS)
     *this = APValue(RHS);
+
+  AllowConstexprUnknown = RHS.AllowConstexprUnknown;
   return *this;
 }
 
@@ -395,6 +400,7 @@ APValue &APValue::operator=(APValue &&RHS) {
       DestroyDataAndMakeUninit();
     Kind = RHS.Kind;
     Data = RHS.Data;
+    AllowConstexprUnknown = RHS.AllowConstexprUnknown;
     RHS.Kind = None;
   }
   return *this;
@@ -426,6 +432,7 @@ void APValue::DestroyDataAndMakeUninit() {
   else if (Kind == AddrLabelDiff)
     ((AddrLabelDiffData *)(char *)&Data)->~AddrLabelDiffData();
   Kind = None;
+  AllowConstexprUnknown = false;
 }
 
 bool APValue::needsCleanup() const {
@@ -468,6 +475,10 @@ bool APValue::needsCleanup() const {
 void APValue::swap(APValue &RHS) {
   std::swap(Kind, RHS.Kind);
   std::swap(Data, RHS.Data);
+  // We can't use std::swap w/ bit-fields
+  bool tmp = AllowConstexprUnknown;
+  AllowConstexprUnknown = RHS.AllowConstexprUnknown;
+  RHS.AllowConstexprUnknown = tmp;
 }
 
 /// Profile the value of an APInt, excluding its bit-width.
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 155dbcfcaeed3..a4ba9fd055346 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3503,6 +3503,34 @@ uint16_t ASTContext::getPointerAuthTypeDiscriminator(QualType T) {
     encodeTypeForFunctionPointerAuth(*this, Out, T);
   } else {
     T = T.getUnqualifiedType();
+    // Calls to member function pointers don't need to worry about
+    // language interop or the laxness of the C type compatibility rules.
+    // We just mangle the member pointer type directly, which is
+    // implicitly much stricter about type matching. However, we do
+    // strip any top-level exception specification before this mangling.
+    // C++23 requires calls to work when the function type is convertible
+    // to the pointer type by a function pointer conversion, which can
+    // change the exception specification. This does not technically
+    // require the exception specification to not affect representation,
+    // because the function pointer conversion is still always a direct
+    // value conversion and therefore an opportunity to resign the
+    // pointer. (This is in contrast to e.g. qualification conversions,
+    // which can be applied in nested pointer positions, effectively
+    // requiring qualified and unqualified representations to match.)
+    // However, it is pragmatic to ignore exception specifications
+    // because it allows a certain amount of `noexcept` mismatching
+    // to not become a visible ODR problem. This also leaves some
+    // room for the committee to add laxness to function pointer
+    // conversions in future standards.
+    if (auto *MPT = T->getAs<MemberPointerType>())
+      if (MPT->isMemberFunctionPointer()) {
+        QualType PointeeType = MPT->getPointeeType();
+        if (PointeeType->castAs<FunctionProtoType>()->getExceptionSpecType() !=
+            EST_None) {
+          QualType FT = getFunctionTypeWithExceptionSpec(PointeeType, EST_None);
+          T = getMemberPointerType(FT, MPT->getClass());
+        }
+      }
     std::unique_ptr<MangleContext> MC(createMangleContext());
     MC->mangleCanonicalTypeName(T, Out);
   }
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index f641a72ed2644..a1a51d38b93e1 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4003,7 +4003,7 @@ const IdentifierInfo *FunctionDecl::getLiteralIdentifier() const {
 FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const {
   if (TemplateOrSpecialization.isNull())
     return TK_NonTemplate;
-  if (const auto *ND = TemplateOrSpecialization.dyn_cast<NamedDecl *>()) {
+  if (const auto *ND = dyn_cast<NamedDecl *>(TemplateOrSpecialization)) {
     if (isa<FunctionDecl>(ND))
       return TK_DependentNonTemplate;
     assert(isa<FunctionTemplateDecl>(ND) &&
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index f6a4ed970cb23..31b95bca613c2 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -5077,6 +5077,8 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__opencl_atomic_init:
   case AO__c11_atomic_load:
   case AO__atomic_load_n:
+  case AO__atomic_test_and_set:
+  case AO__atomic_clear:
     return 2;
 
   case AO__scoped_atomic_load_n:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 2e680d1569f60..734311e5d8b9a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -572,6 +572,7 @@ namespace {
     typedef std::map<MapKeyTy, APValue> MapTy;
     /// Temporaries - Temporary lvalues materialized within this stack frame.
     MapTy Temporaries;
+    MapTy ConstexprUnknownAPValues;
 
     /// CallRange - The source range of the call expression for this call.
     SourceRange CallRange;
@@ -646,6 +647,9 @@ namespace {
     APValue &createTemporary(const KeyT *Key, QualType T,
                              ScopeKind Scope, LValue &LV);
 
+    APValue &createConstexprUnknownAPValues(const VarDecl *Key,
+                                            APValue::LValueBase Base);
+
     /// Allocate storage for a parameter of a function call made in this frame.
     APValue &createParam(CallRef Args, const ParmVarDecl *PVD, LValue &LV);
 
@@ -1630,8 +1634,11 @@ namespace {
     SubobjectDesignator Designator;
     bool IsNullPtr : 1;
     bool InvalidBase : 1;
+    // P2280R4 track if we have an unknown reference or pointer.
+    bool AllowConstexprUnknown = false;
 
     const APValue::LValueBase getLValueBase() const { return Base; }
+    bool allowConstexprUnknown() const { return AllowConstexprUnknown; }
     CharUnits &getLValueOffset() { return Offset; }
     const CharUnits &getLValueOffset() const { return Offset; }
     SubobjectDesignator &getLValueDesignator() { return Designator; }
@@ -1649,6 +1656,8 @@ namespace {
         V = APValue(Base, Offset, Designator.Entries,
                     Designator.IsOnePastTheEnd, IsNullPtr);
       }
+      if (AllowConstexprUnknown)
+        V.setConstexprUnknown();
     }
     void setFrom(ASTContext &Ctx, const APValue &V) {
       assert(V.isLValue() && "Setting LValue from a non-LValue?");
@@ -1657,6 +1666,7 @@ namespace {
       InvalidBase = false;
       Designator = SubobjectDesignator(Ctx, V);
       IsNullPtr = V.isNullPointer();
+      AllowConstexprUnknown = V.allowConstexprUnknown();
     }
 
     void set(APValue::LValueBase B, bool BInvalid = false) {
@@ -1674,6 +1684,7 @@ namespace {
       InvalidBase = BInvalid;
       Designator = SubobjectDesignator(getType(B));
       IsNullPtr = false;
+      AllowConstexprUnknown = false;
     }
 
     void setNull(ASTContext &Ctx, QualType PointerTy) {
@@ -1683,6 +1694,7 @@ namespace {
       InvalidBase = false;
       Designator = SubobjectDesignator(PointerTy->getPointeeType());
       IsNullPtr = true;
+      AllowConstexprUnknown = false;
     }
 
     void setInvalid(APValue::LValueBase B, unsigned I = 0) {
@@ -1944,6 +1956,15 @@ APValue &CallStackFrame::createTemporary(const KeyT *Key, QualType T,
   return createLocal(Base, Key, T, Scope);
 }
 
+APValue &
+CallStackFrame::createConstexprUnknownAPValues(const VarDecl *Key,
+                                               APValue::LValueBase Base) {
+  APValue &Result = ConstexprUnknownAPValues[MapKeyTy(Key, Base.getVersion())];
+  Result = APValue(Base, CharUnits::One(), APValue::ConstexprUnknown{});
+
+  return Result;
+}
+
 /// Allocate storage for a parameter of a function call made in this frame.
 APValue &CallStackFrame::createParam(CallRef Args, const ParmVarDecl *PVD,
                                      LValue &LV) {
@@ -3446,6 +3467,11 @@ static bool HandleLValueVectorElement(EvalInfo &Info, const Expr *E,
 static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
                                 const VarDecl *VD, CallStackFrame *Frame,
                                 unsigned Version, APValue *&Result) {
+  // C++23 [expr.const]p8 If we have a reference type allow unknown references
+  // and pointers.
+  bool AllowConstexprUnknown =
+      Info.getLangOpts().CPlusPlus23 && VD->getType()->isReferenceType();
+
   APValue::LValueBase Base(VD, Frame ? Frame->Index : 0, Version);
 
   // If this is a local variable, dig out its value.
@@ -3480,7 +3506,11 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
     return true;
   }
 
-  if (isa<ParmVarDecl>(VD)) {
+  // P2280R4 struck the restriction that variable of reference type lifetime
+  // should begin within the evaluation of E
+  // Used to be C++20 [expr.const]p5.12.2:
+  // ... its lifetime began within the evaluation of E;
+  if (isa<ParmVarDecl>(VD) && !AllowConstexprUnknown) {
     // Assume parameters of a potential constant expression are usable in
     // constant expressions.
     if (!Info.checkingPotentialConstantExpression() ||
@@ -3504,7 +3534,11 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
   // FIXME: We should eventually check whether the variable has a reachable
   // initializing declaration.
   const Expr *Init = VD->getAnyInitializer(VD);
-  if (!Init) {
+  // P2280R4 struck the restriction that variable of reference type should have
+  // a preceding initialization.
+  // Used to be C++20 [expr.const]p5.12:
+  //   ... reference has a preceding initialization and either ...
+  if (!Init && !AllowConstexprUnknown) {
     // Don't diagnose during potential constant expression checking; an
     // initializer might be added later.
     if (!Info.checkingPotentialConstantExpression()) {
@@ -3515,7 +3549,11 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
     return false;
   }
 
-  if (Init->isValueDependent()) {
+  // P2280R4 struck the initialization requirement for variables of reference
+  // type so we can no longer assume we have an Init.
+  // Used to be C++20 [expr.const]p5.12:
+  //  ... reference has a preceding initialization and either ...
+  if (Init && Init->isValueDependent()) {
     // The DeclRefExpr is not value-dependent, but the variable it refers to
     // has a value-dependent initializer. This should only happen in
     // constant-folding cases, where the variable is not actually of a suitable
@@ -3534,7 +3572,15 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
 
   // Check that we can fold the initializer. In C++, we will have already done
   // this in the cases where it matters for conformance.
-  if (!VD->evaluateValue()) {
+  // P2280R4 struck the initialization requirement for variables of reference
+  // type so we can no longer assume we have an Init.
+  // Used to be C++20 [expr.const]p5.12:
+  //  ... reference has a preceding initialization and either ...
+  if (Init && !VD->evaluateValue()) {
+    if (AllowConstexprUnknown) {
+      Result = &Info.CurrentCall->createConstexprUnknownAPValues(VD, Base);
+      return true;
+    }
     Info.FFDiag(E, diag::note_constexpr_var_init_non_constant, 1) << VD;
     NoteLValueLocation(Info, Base);
     return false;
@@ -3566,6 +3612,20 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
   }
 
   Result = VD->getEvaluatedValue();
+
+  // C++23 [expr.const]p8
+  // ... For such an object that is not usable in constant expressions, the
+  // dynamic type of the object is constexpr-unknown. For such a reference that
+  // is not usable in constant expressions, the reference is treated as binding
+  // to an unspecified object of the referenced type whose lifetime and that of
+  // all subobjects includes the entire constant evaluation and whose dynamic
+  // type is constexpr-unknown.
+  if (AllowConstexprUnknown) {
+    if (!Result)
+      Result = &Info.CurrentCall->createConstexprUnknownAPValues(VD, Base);
+    else
+      Result->setConstexprUnknown();
+  }
   return true;
 }
 
@@ -3847,6 +3907,11 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
   const FieldDecl *LastField = nullptr;
   const FieldDecl *VolatileField = nullptr;
 
+  // C++23 [expr.const]p8 If we have an unknown reference or pointers and it
+  // does not have a value then bail out.
+  if (O->allowConstexprUnknown() && !O->hasValue())
+    return false;
+
   // Walk the designator's path to find the subobject.
   for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) {
     // Reading an indeterminate value is undefined, but assigning over one is OK.
@@ -5909,6 +5974,15 @@ struct CheckDynamicTypeHandler {
 /// dynamic type.
 static bool checkDynamicType(EvalInfo &Info, const Expr *E, const LValue &This,
                              AccessKinds AK, bool Polymorphic) {
+  // We are not allowed to invoke a virtual function whose dynamic type
+  // is constexpr-unknown, so stop early and let this fail later on if we
+  // attempt to do so.
+  // C++23 [expr.const]p5.6
+  // an invocation of a virtual function ([class.virtual]) for an object whose
+  // dynamic type is constexpr-unknown;
+  if (This.allowConstexprUnknown())
+    return true;
+
   if (This.Designator.Invalid)
     return false;
 
@@ -5981,7 +6055,13 @@ static std::optional<DynamicType> ComputeDynamicType(EvalInfo &Info,
   // If we don't have an lvalue denoting an object of class type, there is no
   // meaningful dynamic type. (We consider objects of non-class type to have no
   // dynamic type.)
-  if (!checkDynamicType(Info, E, This, AK, true))
+  if (!checkDynamicType(Info, E, This, AK,
+                        (AK == AK_TypeId
+                             ? (E->getType()->isReferenceType() ? true : false)
+                             : true)))
+    return std::nullopt;
+
+  if (This.Designator.Invalid)
     return std::nullopt;
 
   // Refuse to compute a dynamic type in the presence of virtual bases. This
@@ -8749,7 +8829,8 @@ static bool HandleLambdaCapture(EvalInfo &Info, const Expr *E, LValue &Result,
     const ParmVarDecl *Self = MD->getParamDecl(0);
     if (Self->getType()->isReferenceType()) {
       APValue *RefValue = Info.getParamSlot(Info.CurrentCall->Arguments, Self);
-      Result.setFrom(Info.Ctx, *RefValue);
+      if (!RefValue->allowConstexprUnknown() || RefValue->hasValue())
+        Result.setFrom(Info.Ctx, *RefValue);
     } else {
       const ParmVarDecl *VD = Info.CurrentCall->Arguments.getOrigParam(Self);
       CallStackFrame *Frame =
@@ -8805,7 +8886,10 @@ bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) {
 
 
 bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) {
-
+  // C++23 [expr.const]p8 If we have a reference type allow unknown references
+  // and pointers.
+  bool AllowConstexprUnknown =
+      Info.getLangOpts().CPlusPlus23 && VD->getType()->isReferenceType();
   // If we are within a lambda's call operator, check whether the 'VD' referred
   // to within 'E' actually represents a lambda-capture that maps to a
   // data-member/field within the closure object, and if so, evaluate to the
@@ -8875,10 +8959,24 @@ bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) {
   if (!V->hasValue()) {
     // FIXME: Is it possible for V to be indeterminate here? If so, we should
     // adjust the diagnostic to say that.
-    if (!Info.checkingPotentialConstantExpression())
+    // C++23 [expr.const]p8 If we have a variable that is unknown reference
+    // or pointer it may not have a value but still be usable later on so do not
+    // diagnose.
+    if (!Info.checkingPotentialConstantExpression() && !AllowConstexprUnknown)
       Info.FFDiag(E, diag::note_constexpr_use_uninit_reference);
+
+    // C++23 [expr.const]p8 If we have a variable that is unknown reference or
+    // pointer try to recover it from the frame and set the result accordingly.
+    if (VD->getType()->isReferenceType() && AllowConstexprUnknown) {
+      if (Frame) {
+        Result.set({VD, Frame->Index, Version});
+        return true;
+      }
+      return Success(VD);
+    }
     return false;
   }
+
   return Success(*V, E);
 }
 
@@ -11882,7 +11980,10 @@ class IntExprEvaluator
   }
 
   bool Success(const APValue &V, const Expr *E) {
-    if (V.isLValue() || V.isAddrLabelDiff() || V.isIndeterminate()) {
+    // C++23 [expr.const]p8 If we have a variable that is unknown reference or
+    // pointer allow further evaluation of the value.
+    if (V.isLValue() || V.isAddrLabelDiff() || V.isIndeterminate() ||
+        V.allowConstexprUnknown()) {
       Result = V;
       return true;
     }
@@ -12597,6 +12698,10 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
   auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
     if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
       return false;
+
+    if (Ty->isReferenceType())
+      Ty = Ty.getNonReferenceType();
+
     return HandleSizeof(Info, ExprLoc, Ty, Result);
   };
 
@@ -14266,6 +14371,12 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
     if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK)
       return false;
 
+    // If we have Unknown pointers we should fail if they are not global values.
+    if (!(IsGlobalLValue(LHSValue.getLValueBase()) &&
+          IsGlobalLValue(RHSValue.getLValueBase())) &&
+        (LHSValue.AllowConstexprUnknown || RHSValue.AllowConstexprUnknown))
+      return false;
+
     // Reject differing bases from the normal codepath; we special-case
     // comparisons to null.
     if (!HasSameBase(LHSValue, RHSValue)) {
@@ -17859,6 +17970,9 @@ std::optional<bool> EvaluateBuiltinIsWithinLifetime(IntExprEvaluator &IEE,
   if (!EvaluatePointer(Arg, Val, Info))
     return std::nullopt;
 
+  if (Val.allowConstexprUnknown())
+    return true;
+
   auto Error = [&](int Diag) {
     bool CalledFromStd = false;
     const auto *Callee = Info.CurrentCall->getCallee();
diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
index cefe64409c977..d7b44149d0fc4 100644
--- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp
+++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
@@ -8,8 +8,10 @@
 #include "clang/Analysis/Analyses/ExprMutationAnalyzer.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/OperationKinds.h"
+#include "clang/AST/Stmt.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/ASTMatchers/ASTMatchersMacros.h"
 #include "llvm/ADT/STLExtras.h"
 
 namespace clang {
@@ -22,7 +24,6 @@ using namespace ast_matchers;
 //  - ConditionalOperator
 //  - BinaryConditionalOperator
 static bool canExprResolveTo(const Expr *Source, const Expr *Target) {
-
   const auto IgnoreDerivedToBase = [](const Expr *E, auto Matcher) {
     if (Matcher(E))
       return true;
@@ -79,6 +80,8 @@ static bool canExprResolveTo(const Expr *Source, const Expr *Target) {
 
 namespace {
 
+AST_MATCHER(Type, isDependentType) { return Node.isDependentType(); }
+
 AST_MATCHER_P(LambdaExpr, hasCaptureInit, const Expr *, E) {
   return llvm::is_contained(Node.capture_inits(), E);
 }
@@ -99,6 +102,59 @@ AST_MATCHER_P(Stmt, canResolveToExpr, const Stmt *, Inner) {
   return canExprResolveTo(Exp, Target);
 }
 
+// use class member to store data can reduce stack usage to avoid stack overflow
+// when recursive call.
+class ExprPointeeResolve {
+  const Expr *T;
+
+  bool resolveExpr(const Expr *E) {
+    if (E == nullptr)
+      return false;
+    if (E == T)
+      return true;
+
+    if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
+      if (BO->isAdditiveOp())
+        return (resolveExpr(BO->getLHS()) || resolveExpr(BO->getRHS()));
+      if (BO->isCommaOp())
+        return resolveExpr(BO->getRHS());
+      return false;
+    }
+
+    if (const auto *PE = dyn_cast<ParenExpr>(E))
+      return resolveExpr(PE->getSubExpr());
+
+    if (const auto *ICE = dyn_cast<ImplicitCastExpr>(E)) {
+      // only implicit cast needs to be treated as resolvable.
+      // explicit cast will be checked in `findPointeeToNonConst`
+      const CastKind kind = ICE->getCastKind();
+      if (kind == CK_LValueToRValue || kind == CK_DerivedToBase ||
+          kind == CK_UncheckedDerivedToBase)
+        return resolveExpr(ICE->getSubExpr());
+      return false;
+    }
+
+    if (const auto *ACE = dyn_cast<AbstractConditionalOperator>(E))
+      return resolve(ACE->getTrueExpr()) || resolve(ACE->getFalseExpr());
+
+    return false;
+  }
+
+public:
+  ExprPointeeResolve(const Expr *T) : T(T) {}
+  bool resolve(const Expr *S) { return resolveExpr(S); }
+};
+
+AST_MATCHER_P(Stmt, canResolveToExprPointee, const Stmt *, T) {
+  auto *Exp = dyn_cast<Expr>(&Node);
+  if (!Exp)
+    return true;
+  auto *Target = dyn_cast<Expr>(T);
+  if (!Target)
+    return false;
+  return ExprPointeeResolve{Target}.resolve(Exp);
+}
+
 // Similar to 'hasAnyArgument', but does not work because 'InitListExpr' does
 // not have the 'arguments()' method.
 AST_MATCHER_P(InitListExpr, hasAnyInit, ast_matchers::internal::Matcher<Expr>,
@@ -208,7 +264,14 @@ const Stmt *ExprMutationAnalyzer::Analyzer::findMutation(const Decl *Dec) {
 
 const Stmt *
 ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Expr *Exp) {
-  return findMutationMemoized(Exp, {/*TODO*/}, Memorized.PointeeResults);
+  return findMutationMemoized(
+      Exp,
+      {
+          &ExprMutationAnalyzer::Analyzer::findPointeeValueMutation,
+          &ExprMutationAnalyzer::Analyzer::findPointeeMemberMutation,
+          &ExprMutationAnalyzer::Analyzer::findPointeeToNonConst,
+      },
+      Memorized.PointeeResults);
 }
 
 const Stmt *
@@ -377,7 +440,8 @@ ExprMutationAnalyzer::Analyzer::findDirectMutation(const Expr *Exp) {
   // references.
   const auto NonConstRefParam = forEachArgumentWithParamType(
       anyOf(canResolveToExpr(Exp),
-            memberExpr(hasObjectExpression(canResolveToExpr(Exp)))),
+            memberExpr(
+                hasObjectExpression(ignoringImpCasts(canResolveToExpr(Exp))))),
       nonConstReferenceType());
   const auto NotInstantiated = unless(hasDeclaration(isInstantiated()));
 
@@ -643,6 +707,83 @@ ExprMutationAnalyzer::Analyzer::findFunctionArgMutation(const Expr *Exp) {
   return nullptr;
 }
 
+const Stmt *
+ExprMutationAnalyzer::Analyzer::findPointeeValueMutation(const Expr *Exp) {
+  const auto Matches = match(
+      stmt(forEachDescendant(
+          expr(anyOf(
+                   // deref by *
+                   unaryOperator(hasOperatorName("*"),
+                                 hasUnaryOperand(canResolveToExprPointee(Exp))),
+                   // deref by []
+                   arraySubscriptExpr(hasBase(canResolveToExprPointee(Exp)))))
+              .bind(NodeID<Expr>::value))),
+      Stm, Context);
+  return findExprMutation(Matches);
+}
+
+const Stmt *
+ExprMutationAnalyzer::Analyzer::findPointeeMemberMutation(const Expr *Exp) {
+  const Stmt *MemberCallExpr = selectFirst<Stmt>(
+      "stmt", match(stmt(forEachDescendant(
+                        cxxMemberCallExpr(on(canResolveToExprPointee(Exp)),
+                                          unless(isConstCallee()))
+                            .bind("stmt"))),
+                    Stm, Context));
+  if (MemberCallExpr)
+    return MemberCallExpr;
+  const auto Matches =
+      match(stmt(forEachDescendant(
+                memberExpr(hasObjectExpression(canResolveToExprPointee(Exp)))
+                    .bind(NodeID<Expr>::value))),
+            Stm, Context);
+  return findExprMutation(Matches);
+}
+
+const Stmt *
+ExprMutationAnalyzer::Analyzer::findPointeeToNonConst(const Expr *Exp) {
+  const auto NonConstPointerOrDependentType =
+      type(anyOf(nonConstPointerType(), isDependentType()));
+
+  // assign
+  const auto InitToNonConst =
+      varDecl(hasType(NonConstPointerOrDependentType),
+              hasInitializer(expr(canResolveToExprPointee(Exp)).bind("stmt")));
+  const auto AssignToNonConst =
+      binaryOperation(hasOperatorName("="),
+                      hasLHS(expr(hasType(NonConstPointerOrDependentType))),
+                      hasRHS(canResolveToExprPointee(Exp)));
+  // arguments like
+  const auto ArgOfInstantiationDependent = allOf(
+      hasAnyArgument(canResolveToExprPointee(Exp)), isInstantiationDependent());
+  const auto ArgOfNonConstParameter = forEachArgumentWithParamType(
+      canResolveToExprPointee(Exp), NonConstPointerOrDependentType);
+  const auto CallLikeMatcher =
+      anyOf(ArgOfNonConstParameter, ArgOfInstantiationDependent);
+  const auto PassAsNonConstArg =
+      expr(anyOf(cxxUnresolvedConstructExpr(ArgOfInstantiationDependent),
+                 cxxConstructExpr(CallLikeMatcher), callExpr(CallLikeMatcher),
+                 parenListExpr(has(canResolveToExprPointee(Exp))),
+                 initListExpr(hasAnyInit(canResolveToExprPointee(Exp)))));
+  // cast
+  const auto CastToNonConst =
+      explicitCastExpr(hasSourceExpression(canResolveToExprPointee(Exp)),
+                       hasDestinationType(NonConstPointerOrDependentType));
+
+  // capture
+  // FIXME: false positive if the pointee does not change in lambda
+  const auto CaptureNoConst = lambdaExpr(hasCaptureInit(Exp));
+
+  const auto Matches =
+      match(stmt(anyOf(forEachDescendant(
+                           stmt(anyOf(AssignToNonConst, PassAsNonConstArg,
+                                      CastToNonConst, CaptureNoConst))
+                               .bind("stmt")),
+                       forEachDescendant(InitToNonConst))),
+            Stm, Context);
+  return selectFirst<Stmt>("stmt", Matches);
+}
+
 FunctionParmMutationAnalyzer::FunctionParmMutationAnalyzer(
     const FunctionDecl &Func, ASTContext &Context,
     ExprMutationAnalyzer::Memoized &Memorized)
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index d56609a2a8f24..1bfec0b37c5ee 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -44,6 +44,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
     CUDA_ENTRY(12, 4),
     CUDA_ENTRY(12, 5),
     CUDA_ENTRY(12, 6),
+    CUDA_ENTRY(12, 8),
     {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
     {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -98,6 +99,7 @@ static const OffloadArchToStringMap arch_names[] = {
     SM(90),                          // Hopper
     SM(90a),                         // Hopper
     SM(100),                         // Blackwell
+    SM(100a),                        // Blackwell
     GFX(600),  // gfx600
     GFX(601),  // gfx601
     GFX(602),  // gfx602
@@ -227,8 +229,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
   case OffloadArch::SM_90a:
     return CudaVersion::CUDA_120;
   case OffloadArch::SM_100:
-    return CudaVersion::NEW; // TODO: use specific CUDA version once it's
-                             // public.
+  case OffloadArch::SM_100a:
+    return CudaVersion::CUDA_128;
   default:
     llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index 81194bbf2538e..de1de6f61f3a1 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include <map>
@@ -576,11 +577,7 @@ namespace {
     uint16_t SubGroups;
     StringRef Documentation;
 
-    // String is stored with a pascal-style length byte.
-    StringRef getName() const {
-      return StringRef(DiagGroupNames + NameOffset + 1,
-                       DiagGroupNames[NameOffset]);
-    }
+    StringRef getName() const { return DiagGroupNames[NameOffset]; }
   };
 }
 
@@ -627,11 +624,12 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) {
 
 std::vector<std::string> DiagnosticIDs::getDiagnosticFlags() {
   std::vector<std::string> Res{"-W", "-Wno-"};
-  for (size_t I = 1; DiagGroupNames[I] != '\0';) {
-    std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]);
-    I += DiagGroupNames[I] + 1;
-    Res.push_back("-W" + Diag);
-    Res.push_back("-Wno-" + Diag);
+  for (StringRef Name : DiagGroupNames) {
+    if (Name.empty())
+      continue;
+
+    Res.push_back((Twine("-W") + Name).str());
+    Res.push_back((Twine("-Wno-") + Name).str());
   }
 
   return Res;
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 94caf6a3897bc..e3037ec819add 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -208,6 +208,8 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
 
   // OpenCL and HLSL have half keyword
   Opts.Half = Opts.OpenCL || Opts.HLSL;
+
+  Opts.PreserveVec3Type = Opts.HLSL;
 }
 
 FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) {
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index dbc3fec365761..56efad90cb7c8 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -285,6 +285,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
       case OffloadArch::SM_90a:
         return "900";
       case OffloadArch::SM_100:
+      case OffloadArch::SM_100a:
         return "1000";
       }
       llvm_unreachable("unhandled OffloadArch");
@@ -292,6 +293,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
     if (GPU == OffloadArch::SM_90a)
       Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
+    if (GPU == OffloadArch::SM_100a)
+      Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
   }
 }
 
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index 642bca9e8b76d..cda8a494f6c27 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -236,6 +236,14 @@ void ABIInfo::appendAttributeMangling(StringRef AttrStr,
   }
 }
 
+llvm::FixedVectorType *
+ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T,
+                                    const LangOptions &Opt) const {
+  if (T->getNumElements() == 3 && !Opt.PreserveVec3Type)
+    return llvm::FixedVectorType::get(T->getElementType(), 4);
+  return T;
+}
+
 // Pin the vtable to this file.
 SwiftABIInfo::~SwiftABIInfo() = default;
 
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index b8a8de57e5b97..213e7879c3162 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -20,6 +20,7 @@ class Value;
 class LLVMContext;
 class DataLayout;
 class Type;
+class FixedVectorType;
 } // namespace llvm
 
 namespace clang {
@@ -123,6 +124,13 @@ class ABIInfo {
                                        raw_ostream &Out) const;
   virtual void appendAttributeMangling(StringRef AttrStr,
                                        raw_ostream &Out) const;
+
+  /// Returns the optimal vector memory type based on the given vector type. For
+  /// example, on certain targets, a vector with 3 elements might be promoted to
+  /// one with 4 elements to improve performance.
+  virtual llvm::FixedVectorType *
+  getOptimalVectorMemoryType(llvm::FixedVectorType *T,
+                             const LangOptions &Opt) const;
 };
 
 /// Target specific hooks for defining how a type should be passed or returned
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index f6cb2ad421e90..3adb2a7ad207f 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -723,6 +723,24 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
   case AtomicExpr::AO__scoped_atomic_fetch_nand:
     Op = llvm::AtomicRMWInst::Nand;
     break;
+
+  case AtomicExpr::AO__atomic_test_and_set: {
+    llvm::AtomicRMWInst *RMWI =
+        CGF.emitAtomicRMWInst(llvm::AtomicRMWInst::Xchg, Ptr,
+                              CGF.Builder.getInt8(1), Order, Scope, E);
+    RMWI->setVolatile(E->isVolatile());
+    llvm::Value *Result = CGF.Builder.CreateIsNotNull(RMWI, "tobool");
+    CGF.Builder.CreateStore(Result, Dest);
+    return;
+  }
+
+  case AtomicExpr::AO__atomic_clear: {
+    llvm::StoreInst *Store =
+        CGF.Builder.CreateStore(CGF.Builder.getInt8(0), Ptr);
+    Store->setAtomic(Order, Scope);
+    Store->setVolatile(E->isVolatile());
+    return;
+  }
   }
 
   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
@@ -878,6 +896,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   case AtomicExpr::AO__c11_atomic_load:
   case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__hip_atomic_load:
+  case AtomicExpr::AO__atomic_test_and_set:
+  case AtomicExpr::AO__atomic_clear:
     break;
 
   case AtomicExpr::AO__atomic_load:
@@ -1200,6 +1220,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__opencl_atomic_fetch_max:
     case AtomicExpr::AO__scoped_atomic_fetch_max:
     case AtomicExpr::AO__scoped_atomic_max_fetch:
+    case AtomicExpr::AO__atomic_test_and_set:
+    case AtomicExpr::AO__atomic_clear:
       llvm_unreachable("Integral atomic operations always become atomicrmw!");
     }
 
@@ -1239,7 +1261,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                  E->getOp() == AtomicExpr::AO__atomic_store ||
                  E->getOp() == AtomicExpr::AO__atomic_store_n ||
                  E->getOp() == AtomicExpr::AO__scoped_atomic_store ||
-                 E->getOp() == AtomicExpr::AO__scoped_atomic_store_n;
+                 E->getOp() == AtomicExpr::AO__scoped_atomic_store_n ||
+                 E->getOp() == AtomicExpr::AO__atomic_clear;
   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
                 E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
                 E->getOp() == AtomicExpr::AO__hip_atomic_load ||
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 28d9a981d29e5..f1515347fb816 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -68,6 +68,7 @@
 #include "llvm/TargetParser/RISCVISAInfo.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include <numeric>
 #include <optional>
 #include <utility>
 
@@ -5127,147 +5128,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                     ReturnValueSlot(), Args);
   }
 
-  case Builtin::BI__atomic_test_and_set: {
-    // Look at the argument type to determine whether this is a volatile
-    // operation. The parameter type is always volatile.
-    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
-    bool Volatile =
-        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
-
-    Address Ptr =
-        EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
-
-    Value *NewVal = Builder.getInt8(1);
-    Value *Order = EmitScalarExpr(E->getArg(1));
-    if (isa<llvm::ConstantInt>(Order)) {
-      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      AtomicRMWInst *Result = nullptr;
-      switch (ord) {
-      case 0:  // memory_order_relaxed
-      default: // invalid order
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
-                                         llvm::AtomicOrdering::Monotonic);
-        break;
-      case 1: // memory_order_consume
-      case 2: // memory_order_acquire
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
-                                         llvm::AtomicOrdering::Acquire);
-        break;
-      case 3: // memory_order_release
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
-                                         llvm::AtomicOrdering::Release);
-        break;
-      case 4: // memory_order_acq_rel
-
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
-                                         llvm::AtomicOrdering::AcquireRelease);
-        break;
-      case 5: // memory_order_seq_cst
-        Result = Builder.CreateAtomicRMW(
-            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
-            llvm::AtomicOrdering::SequentiallyConsistent);
-        break;
-      }
-      Result->setVolatile(Volatile);
-      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
-    }
-
-    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
-
-    llvm::BasicBlock *BBs[5] = {
-      createBasicBlock("monotonic", CurFn),
-      createBasicBlock("acquire", CurFn),
-      createBasicBlock("release", CurFn),
-      createBasicBlock("acqrel", CurFn),
-      createBasicBlock("seqcst", CurFn)
-    };
-    llvm::AtomicOrdering Orders[5] = {
-        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
-        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
-        llvm::AtomicOrdering::SequentiallyConsistent};
-
-    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
-    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
-
-    Builder.SetInsertPoint(ContBB);
-    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
-
-    for (unsigned i = 0; i < 5; ++i) {
-      Builder.SetInsertPoint(BBs[i]);
-      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                                   Ptr, NewVal, Orders[i]);
-      RMW->setVolatile(Volatile);
-      Result->addIncoming(RMW, BBs[i]);
-      Builder.CreateBr(ContBB);
-    }
-
-    SI->addCase(Builder.getInt32(0), BBs[0]);
-    SI->addCase(Builder.getInt32(1), BBs[1]);
-    SI->addCase(Builder.getInt32(2), BBs[1]);
-    SI->addCase(Builder.getInt32(3), BBs[2]);
-    SI->addCase(Builder.getInt32(4), BBs[3]);
-    SI->addCase(Builder.getInt32(5), BBs[4]);
-
-    Builder.SetInsertPoint(ContBB);
-    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
-  }
-
-  case Builtin::BI__atomic_clear: {
-    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
-    bool Volatile =
-        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
-
-    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
-    Ptr = Ptr.withElementType(Int8Ty);
-    Value *NewVal = Builder.getInt8(0);
-    Value *Order = EmitScalarExpr(E->getArg(1));
-    if (isa<llvm::ConstantInt>(Order)) {
-      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
-      switch (ord) {
-      case 0:  // memory_order_relaxed
-      default: // invalid order
-        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
-        break;
-      case 3:  // memory_order_release
-        Store->setOrdering(llvm::AtomicOrdering::Release);
-        break;
-      case 5:  // memory_order_seq_cst
-        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
-        break;
-      }
-      return RValue::get(nullptr);
-    }
-
-    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
-
-    llvm::BasicBlock *BBs[3] = {
-      createBasicBlock("monotonic", CurFn),
-      createBasicBlock("release", CurFn),
-      createBasicBlock("seqcst", CurFn)
-    };
-    llvm::AtomicOrdering Orders[3] = {
-        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
-        llvm::AtomicOrdering::SequentiallyConsistent};
-
-    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
-    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
-
-    for (unsigned i = 0; i < 3; ++i) {
-      Builder.SetInsertPoint(BBs[i]);
-      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
-      Store->setOrdering(Orders[i]);
-      Builder.CreateBr(ContBB);
-    }
-
-    SI->addCase(Builder.getInt32(0), BBs[0]);
-    SI->addCase(Builder.getInt32(3), BBs[1]);
-    SI->addCase(Builder.getInt32(5), BBs[2]);
-
-    Builder.SetInsertPoint(ContBB);
-    return RValue::get(nullptr);
-  }
-
   case Builtin::BI__atomic_thread_fence:
   case Builtin::BI__atomic_signal_fence:
   case Builtin::BI__c11_atomic_thread_fence:
@@ -19332,9 +19192,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     assert(T0->isVectorTy() && T1->isVectorTy() &&
            "Dot product of vector and scalar is not supported.");
 
-    auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
+    auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
     [[maybe_unused]] auto *VecTy1 =
-        E->getArg(1)->getType()->getAs<VectorType>();
+        E->getArg(1)->getType()->castAs<VectorType>();
 
     assert(VecTy0->getElementType() == VecTy1->getElementType() &&
            "Dot product of vectors need the same element types.");
@@ -19427,7 +19287,7 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
     llvm::Type *Xty = Op0->getType();
     llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
     if (Xty->isVectorTy()) {
-      auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
+      auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
       retType = llvm::VectorType::get(
           retType, ElementCount::getFixed(XVecTy->getNumElements()));
     }
@@ -19613,7 +19473,7 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
     llvm::Type *Xty = Op0->getType();
     llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
     if (Xty->isVectorTy()) {
-      auto *XVecTy = Arg0->getType()->getAs<VectorType>();
+      auto *XVecTy = Arg0->getType()->castAs<VectorType>();
       retType = llvm::VectorType::get(
           retType, ElementCount::getFixed(XVecTy->getNumElements()));
     }
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9a9a8c7f6eae0..054f8d1eadb8c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/Transforms/Utils/SanitizerStats.h"
 
+#include <numeric>
 #include <optional>
 #include <string>
 
@@ -2002,20 +2003,19 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
       return EmitFromMemory(V, Ty);
     }
 
-    // Handle vectors of size 3 like size 4 for better performance.
-    const llvm::Type *EltTy = Addr.getElementType();
-    const auto *VTy = cast<llvm::FixedVectorType>(EltTy);
-
-    if (!CGM.getCodeGenOpts().PreserveVec3Type && VTy->getNumElements() == 3) {
-
-      llvm::VectorType *vec4Ty =
-          llvm::FixedVectorType::get(VTy->getElementType(), 4);
-      Address Cast = Addr.withElementType(vec4Ty);
-      // Now load value.
-      llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
-      // Shuffle vector to get vec3.
-      V = Builder.CreateShuffleVector(V, ArrayRef<int>{0, 1, 2}, "extractVec");
+    // Handles vectors of sizes that are likely to be expanded to a larger size
+    // to optimize performance.
+    auto *VTy = cast<llvm::FixedVectorType>(Addr.getElementType());
+    auto *NewVecTy =
+        CGM.getABIInfo().getOptimalVectorMemoryType(VTy, getLangOpts());
+
+    if (VTy != NewVecTy) {
+      Address Cast = Addr.withElementType(NewVecTy);
+      llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVecN");
+      unsigned OldNumElements = VTy->getNumElements();
+      SmallVector<int, 16> Mask(OldNumElements);
+      std::iota(Mask.begin(), Mask.end(), 0);
+      V = Builder.CreateShuffleVector(V, Mask, "extractVec");
       return EmitFromMemory(V, Ty);
     }
   }
@@ -2145,21 +2145,21 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
       Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV),
                               NotKnownNonNull);
 
+  // Handles vectors of sizes that are likely to be expanded to a larger size
+  // to optimize performance.
   llvm::Type *SrcTy = Value->getType();
   if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
-    auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
-    if (!CGM.getCodeGenOpts().PreserveVec3Type) {
-      // Handle vec3 special.
-      if (VecTy && !ClangVecTy->isExtVectorBoolType() &&
-          cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
-        // Our source is a vec3, do a shuffle vector to make it a vec4.
-        Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
-                                            "extractVec");
-        SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4);
+    if (auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+      auto *NewVecTy =
+          CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts());
+      if (!ClangVecTy->isExtVectorBoolType() && VecTy != NewVecTy) {
+        SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
+        std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
+        Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
+        SrcTy = NewVecTy;
       }
-      if (Addr.getElementType() != SrcTy) {
+      if (Addr.getElementType() != SrcTy)
         Addr = Addr.withElementType(SrcTy);
-      }
     }
   }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 87c3635ed3f70..c13928f61a748 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2277,6 +2277,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
       case OffloadArch::SM_90:
       case OffloadArch::SM_90a:
       case OffloadArch::SM_100:
+      case OffloadArch::SM_100a:
       case OffloadArch::GFX600:
       case OffloadArch::GFX601:
       case OffloadArch::GFX602:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index fa07e68c55835..788eac5f28231 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -52,6 +52,17 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                    AggValueSlot Slot) const override;
+
+  llvm::FixedVectorType *
+  getOptimalVectorMemoryType(llvm::FixedVectorType *T,
+                             const LangOptions &Opt) const override {
+    // We have legal instructions for 96-bit so 3x32 can be supported.
+    // FIXME: This check should be a subtarget feature as technically SI doesn't
+    // support it.
+    if (T->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(T) == 96)
+      return T;
+    return DefaultABIInfo::getOptimalVectorMemoryType(T, Opt);
+  }
 };
 
 bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index bbd9397aa2378..c9b45ce58bd4c 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -181,10 +181,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
     }
   }
 
-  // Select the `ual` feature determined by -m[no-]strict-align.
-  AddTargetFeature(Args, Features, options::OPT_mno_strict_align,
-                   options::OPT_mstrict_align, "ual");
-
   // Accept but warn about these TargetSpecific options.
   if (Arg *A = Args.getLastArgNoClaim(options::OPT_mabi_EQ))
     A->ignoreTargetSpecific();
@@ -257,50 +253,20 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
       Features.push_back("-lasx");
   }
 
-  // Select frecipe feature determined by -m[no-]frecipe.
-  if (const Arg *A =
-          Args.getLastArg(options::OPT_mfrecipe, options::OPT_mno_frecipe)) {
-    if (A->getOption().matches(options::OPT_mfrecipe))
-      Features.push_back("+frecipe");
-    else
-      Features.push_back("-frecipe");
-  }
-
-  // Select lam-bh feature determined by -m[no-]lam-bh.
-  if (const Arg *A =
-          Args.getLastArg(options::OPT_mlam_bh, options::OPT_mno_lam_bh)) {
-    if (A->getOption().matches(options::OPT_mlam_bh))
-      Features.push_back("+lam-bh");
-    else
-      Features.push_back("-lam-bh");
-  }
-
-  // Select lamcas feature determined by -m[no-]lamcas.
-  if (const Arg *A =
-          Args.getLastArg(options::OPT_mlamcas, options::OPT_mno_lamcas)) {
-    if (A->getOption().matches(options::OPT_mlamcas))
-      Features.push_back("+lamcas");
-    else
-      Features.push_back("-lamcas");
-  }
-
-  // Select ld-seq-sa feature determined by -m[no-]ld-seq-sa.
-  if (const Arg *A = Args.getLastArg(options::OPT_mld_seq_sa,
-                                     options::OPT_mno_ld_seq_sa)) {
-    if (A->getOption().matches(options::OPT_mld_seq_sa))
-      Features.push_back("+ld-seq-sa");
-    else
-      Features.push_back("-ld-seq-sa");
-  }
-
-  // Select div32 feature determined by -m[no-]div32.
-  if (const Arg *A =
-          Args.getLastArg(options::OPT_mdiv32, options::OPT_mno_div32)) {
-    if (A->getOption().matches(options::OPT_mdiv32))
-      Features.push_back("+div32");
-    else
-      Features.push_back("-div32");
-  }
+  AddTargetFeature(Args, Features, options::OPT_mno_strict_align,
+                   options::OPT_mstrict_align, "ual");
+  AddTargetFeature(Args, Features, options::OPT_mno_strict_align,
+                   options::OPT_mstrict_align, "ual");
+  AddTargetFeature(Args, Features, options::OPT_mfrecipe,
+                   options::OPT_mno_frecipe, "frecipe");
+  AddTargetFeature(Args, Features, options::OPT_mlam_bh,
+                   options::OPT_mno_lam_bh, "lam-bh");
+  AddTargetFeature(Args, Features, options::OPT_mlamcas,
+                   options::OPT_mno_lamcas, "lamcas");
+  AddTargetFeature(Args, Features, options::OPT_mld_seq_sa,
+                   options::OPT_mno_ld_seq_sa, "ld-seq-sa");
+  AddTargetFeature(Args, Features, options::OPT_mdiv32,
+                   options::OPT_mno_div32, "div32");
 }
 
 std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
index 6a2f7936fca39..ca0745fc2b32d 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -366,9 +366,6 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) {
     Features.push_back("+fpxx");
     Features.push_back("+nooddspreg");
-  } else if (mips::isFP64ADefault(Triple, CPUName)) {
-    Features.push_back("+fp64");
-    Features.push_back("+nooddspreg");
   } else if (Arg *A = Args.getLastArg(options::OPT_mmsa)) {
     if (A->getOption().matches(options::OPT_mmsa))
       Features.push_back("+fp64");
@@ -465,16 +462,6 @@ bool mips::isNaN2008(const Driver &D, const ArgList &Args,
       .Default(false);
 }
 
-bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) {
-  if (!Triple.isAndroid())
-    return false;
-
-  // Android MIPS32R6 defaults to FP64A.
-  return llvm::StringSwitch<bool>(CPUName)
-      .Case("mips32r6", true)
-      .Default(false);
-}
-
 bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
                          StringRef ABIName, mips::FloatABI FloatABI) {
   if (ABIName != "32")
diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.h b/clang/lib/Driver/ToolChains/Arch/Mips.h
index 674c21744b523..058e82b09831a 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.h
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.h
@@ -46,7 +46,6 @@ bool hasMipsAbiArg(const llvm::opt::ArgList &Args, const char *Value);
 bool isUCLibc(const llvm::opt::ArgList &Args);
 bool isNaN2008(const Driver &D, const llvm::opt::ArgList &Args,
                const llvm::Triple &Triple);
-bool isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName);
 bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
                    StringRef ABIName, mips::FloatABI FloatABI);
 bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8967115bcc73d..d4099216c81ba 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -89,6 +89,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
     return CudaVersion::CUDA_125;
   if (raw_version < 12070)
     return CudaVersion::CUDA_126;
+  if (raw_version < 12090)
+    return CudaVersion::CUDA_128;
   return CudaVersion::NEW;
 }
 
@@ -682,6 +684,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case CudaVersion::CUDA_##CUDA_VER:                                           \
     PtxFeature = "+ptx" #PTX_VER;                                              \
     break;
+    CASE_CUDA_VERSION(128, 87);
     CASE_CUDA_VERSION(126, 85);
     CASE_CUDA_VERSION(125, 85);
     CASE_CUDA_VERSION(124, 84);
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 84ef8199de049..55c55bad73934 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2164,7 +2164,8 @@ inferDeploymentTargetFromArch(DerivedArgList &Args, const Darwin &Toolchain,
   StringRef MachOArchName = Toolchain.getMachOArchName(Args);
   if (MachOArchName == "arm64" || MachOArchName == "arm64e")
     OSTy = llvm::Triple::MacOSX;
-  else if (MachOArchName == "armv7" || MachOArchName == "armv7s")
+  else if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
+           MachOArchName == "armv6")
     OSTy = llvm::Triple::IOS;
   else if (MachOArchName == "armv7k" || MachOArchName == "arm64_32")
     OSTy = llvm::Triple::WatchOS;
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index e5db1b2f1550b..6dfa94bf2123b 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -1146,53 +1146,6 @@ static bool findMipsCsMultilibs(const Driver &D,
   return false;
 }
 
-static bool findMipsAndroidMultilibs(const Driver &D,
-                                     llvm::vfs::FileSystem &VFS, StringRef Path,
-                                     const Multilib::flags_list &Flags,
-                                     FilterNonExistent &NonExistent,
-                                     DetectedMultilibs &Result) {
-
-  MultilibSet AndroidMipsMultilibs =
-      MultilibSetBuilder()
-          .Maybe(MultilibBuilder("/mips-r2", {}, {}).flag("-march=mips32r2"))
-          .Maybe(MultilibBuilder("/mips-r6", {}, {}).flag("-march=mips32r6"))
-          .makeMultilibSet()
-          .FilterOut(NonExistent);
-
-  MultilibSet AndroidMipselMultilibs =
-      MultilibSetBuilder()
-          .Either(MultilibBuilder().flag("-march=mips32"),
-                  MultilibBuilder("/mips-r2", "", "/mips-r2")
-                      .flag("-march=mips32r2"),
-                  MultilibBuilder("/mips-r6", "", "/mips-r6")
-                      .flag("-march=mips32r6"))
-          .makeMultilibSet()
-          .FilterOut(NonExistent);
-
-  MultilibSet AndroidMips64elMultilibs =
-      MultilibSetBuilder()
-          .Either(MultilibBuilder().flag("-march=mips64r6"),
-                  MultilibBuilder("/32/mips-r1", "", "/mips-r1")
-                      .flag("-march=mips32"),
-                  MultilibBuilder("/32/mips-r2", "", "/mips-r2")
-                      .flag("-march=mips32r2"),
-                  MultilibBuilder("/32/mips-r6", "", "/mips-r6")
-                      .flag("-march=mips32r6"))
-          .makeMultilibSet()
-          .FilterOut(NonExistent);
-
-  MultilibSet *MS = &AndroidMipsMultilibs;
-  if (VFS.exists(Path + "/mips-r6"))
-    MS = &AndroidMipselMultilibs;
-  else if (VFS.exists(Path + "/32"))
-    MS = &AndroidMips64elMultilibs;
-  if (MS->select(D, Flags, Result.SelectedMultilibs)) {
-    Result.Multilibs = *MS;
-    return true;
-  }
-  return false;
-}
-
 static bool findMipsMuslMultilibs(const Driver &D,
                                   const Multilib::flags_list &Flags,
                                   FilterNonExistent &NonExistent,
@@ -1560,10 +1513,6 @@ bool clang::driver::findMIPSMultilibs(const Driver &D,
   addMultilibFlag(isMipsEL(TargetArch), "-EL", Flags);
   addMultilibFlag(!isMipsEL(TargetArch), "-EB", Flags);
 
-  if (TargetTriple.isAndroid())
-    return findMipsAndroidMultilibs(D, D.getVFS(), Path, Flags, NonExistent,
-                                    Result);
-
   if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
       TargetTriple.getOS() == llvm::Triple::Linux &&
       TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index bfb6ec7a01058..dfe9acc1ec795 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -466,11 +466,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
 
   Objf << ObjBuffer;
 
-  ArgStringList McArgs{"-target", Args.MakeArgString(HostTriple.normalize()),
+  ArgStringList ClangArgs{"-target", Args.MakeArgString(HostTriple.normalize()),
                        "-o",      Output.getFilename(),
                        "-x",      "assembler",
                        ObjinFile, "-c"};
   C.addCommand(std::make_unique<Command>(JA, T, ResponseFileSupport::None(),
-                                         D.getClangProgramPath(), McArgs,
+                                         D.getClangProgramPath(), ClangArgs,
                                          Inputs, Output, D.getPrependArg()));
 }
diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp
index fd4c2f9bf68cd..e1187ce48c3e4 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.cpp
+++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp
@@ -490,7 +490,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple,
 
   bool Linking = !Args.hasArg(options::OPT_E, options::OPT_c, options::OPT_S,
                               options::OPT_emit_ast);
-  if (!CustomSysroot && Linking) {
+  if (Linking) {
     SmallString<128> Dir(SDKLibraryRootDir);
     llvm::sys::path::append(Dir, "target/lib");
     if (CheckSDKPartExists(Dir, "system libraries"))
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 6826fa76662cf..c25d9bf7c2251 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1040,7 +1040,6 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.EmptyLineBeforeAccessModifier);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
                    Style.ExperimentalAutoDetectBinPacking);
-    IO.mapOptional("ExportBlockIndentation", Style.ExportBlockIndentation);
     IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
     IO.mapOptional("IfMacros", Style.IfMacros);
@@ -1052,6 +1051,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("IndentAccessModifiers", Style.IndentAccessModifiers);
     IO.mapOptional("IndentCaseBlocks", Style.IndentCaseBlocks);
     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
+    IO.mapOptional("IndentExportBlock", Style.IndentExportBlock);
     IO.mapOptional("IndentExternBlock", Style.IndentExternBlock);
     IO.mapOptional("IndentGotoLabels", Style.IndentGotoLabels);
     IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives);
@@ -1551,7 +1551,6 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
   LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
-  LLVMStyle.ExportBlockIndentation = true;
   LLVMStyle.FixNamespaceComments = true;
   LLVMStyle.ForEachMacros.push_back("foreach");
   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
@@ -1567,6 +1566,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.IndentAccessModifiers = false;
   LLVMStyle.IndentCaseBlocks = false;
   LLVMStyle.IndentCaseLabels = false;
+  LLVMStyle.IndentExportBlock = true;
   LLVMStyle.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
   LLVMStyle.IndentGotoLabels = true;
   LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None;
diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index 530b2dd538cee..21fb5074b4928 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -386,7 +386,8 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft(
   // For left qualifiers preceeded by nothing, a template declaration, or *,&,&&
   // we only perform sorting.
   if (!TypeToken || TypeToken->isPointerOrReference() ||
-      TypeToken->ClosesRequiresClause || TypeToken->ClosesTemplateDeclaration) {
+      TypeToken->ClosesRequiresClause || TypeToken->ClosesTemplateDeclaration ||
+      TypeToken->is(tok::r_square)) {
 
     // Don't sort past a non-configured qualifier token.
     const FormatToken *FirstQual = Tok;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 834693e2ecf0c..4258329136348 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -3167,8 +3167,7 @@ void UnwrappedLineParser::parseNamespace() {
 }
 
 void UnwrappedLineParser::parseCppExportBlock() {
-  parseNamespaceOrExportBlock(/*AddLevels=*/Style.ExportBlockIndentation ? 1
-                                                                         : 0);
+  parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
 }
 
 void UnwrappedLineParser::parseNew() {
diff --git a/clang/lib/Headers/avx10_2_512minmaxintrin.h b/clang/lib/Headers/avx10_2_512minmaxintrin.h
index e175365d11df8..fbc7fbadbc6b2 100644
--- a/clang/lib/Headers/avx10_2_512minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2_512minmaxintrin.h
@@ -14,22 +14,22 @@
 #ifndef __AVX10_2_512MINMAXINTRIN_H
 #define __AVX10_2_512MINMAXINTRIN_H
 
-#define _mm512_minmaxne_pbh(A, B, C)                                           \
-  ((__m512bh)__builtin_ia32_vminmaxnepbf16512(                                 \
-      (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(A), (int)(C)))
+#define _mm512_minmax_pbh(A, B, C)                                             \
+  ((__m512bh)__builtin_ia32_vminmaxbf16512((__v32bf)(__m512bh)(A),             \
+                                           (__v32bf)(__m512bh)(A), (int)(C)))
 
-#define _mm512_mask_minmaxne_pbh(W, U, A, B, C)                                \
+#define _mm512_mask_minmax_pbh(W, U, A, B, C)                                  \
   ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
       (__mmask32)(U),                                                          \
-      (__v32bf)_mm512_minmaxne_pbh((__v32bf)(__m512bh)(A),                     \
-                                   (__v32bf)(__m512bh)(B), (int)(C)),          \
+      (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A),                       \
+                                 (__v32bf)(__m512bh)(B), (int)(C)),            \
       (__v32bf)(__m512bh)(W)))
 
-#define _mm512_maskz_minmaxne_pbh(U, A, B, C)                                  \
+#define _mm512_maskz_minmax_pbh(U, A, B, C)                                    \
   ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
       (__mmask32)(U),                                                          \
-      (__v32bf)_mm512_minmaxne_pbh((__v32bf)(__m512bh)(A),                     \
-                                   (__v32bf)(__m512bh)(B), (int)(C)),          \
+      (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A),                       \
+                                 (__v32bf)(__m512bh)(B), (int)(C)),            \
       (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps())))
 
 #define _mm512_minmax_pd(A, B, C)                                              \
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
index a9367e7424658..8164d49d89f1f 100644
--- a/clang/lib/Headers/avx10_2minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -14,40 +14,40 @@
 #ifndef __AVX10_2MINMAXINTRIN_H
 #define __AVX10_2MINMAXINTRIN_H
 
-#define _mm_minmaxne_pbh(A, B, C)                                              \
-  ((__m128bh)__builtin_ia32_vminmaxnepbf16128(                                 \
-      (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
+#define _mm_minmax_pbh(A, B, C)                                                \
+  ((__m128bh)__builtin_ia32_vminmaxbf16128((__m128bh)(__v8bf)(A),              \
+                                           (__m128bh)(__v8bf)(B), (int)(C)))
 
-#define _mm_mask_minmaxne_pbh(W, U, A, B, C)                                   \
+#define _mm_mask_minmax_pbh(W, U, A, B, C)                                     \
   ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
       (__mmask8)(U),                                                           \
-      (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),   \
-                               (int)(C)),                                      \
+      (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),     \
+                             (int)(C)),                                        \
       (__v8bf)(W)))
 
-#define _mm_maskz_minmaxne_pbh(U, A, B, C)                                     \
+#define _mm_maskz_minmax_pbh(U, A, B, C)                                       \
   ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
       (__mmask8)(U),                                                           \
-      (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),   \
-                               (int)(C)),                                      \
+      (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),     \
+                             (int)(C)),                                        \
       (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
 
-#define _mm256_minmaxne_pbh(A, B, C)                                           \
-  ((__m256bh)__builtin_ia32_vminmaxnepbf16256(                                 \
-      (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
+#define _mm256_minmax_pbh(A, B, C)                                             \
+  ((__m256bh)__builtin_ia32_vminmaxbf16256((__m256bh)(__v16bf)(A),             \
+                                           (__m256bh)(__v16bf)(B), (int)(C)))
 
-#define _mm256_mask_minmaxne_pbh(W, U, A, B, C)                                \
+#define _mm256_mask_minmax_pbh(W, U, A, B, C)                                  \
   ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
       (__mmask16)(U),                                                          \
-      (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A),                     \
-                                   (__m256bh)(__v16bf)(B), (int)(C)),          \
+      (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A),                       \
+                                 (__m256bh)(__v16bf)(B), (int)(C)),            \
       (__v16bf)(W)))
 
-#define _mm256_maskz_minmaxne_pbh(U, A, B, C)                                  \
+#define _mm256_maskz_minmax_pbh(U, A, B, C)                                    \
   ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
       (__mmask16)(U),                                                          \
-      (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A),                     \
-                                   (__m256bh)(__v16bf)(B), (int)(C)),          \
+      (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A),                       \
+                                 (__m256bh)(__v16bf)(B), (int)(C)),            \
       (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
 
 #define _mm_minmax_pd(A, B, C)                                                 \
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 115b6c1606a02..087c6f13aea66 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1352,6 +1352,27 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
   return Tok;
 }
 
+std::optional<Token> Lexer::findPreviousToken(SourceLocation Loc,
+                                              const SourceManager &SM,
+                                              const LangOptions &LangOpts,
+                                              bool IncludeComments) {
+  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Loc));
+  while (Loc != StartOfFile) {
+    Loc = Loc.getLocWithOffset(-1);
+    if (Loc.isInvalid())
+      return std::nullopt;
+
+    Loc = GetBeginningOfToken(Loc, SM, LangOpts);
+    Token Tok;
+    if (getRawToken(Loc, Tok, SM, LangOpts))
+      continue; // Not a token, go to prev location.
+    if (!Tok.is(tok::comment) || IncludeComments) {
+      return Tok;
+    }
+  }
+  return std::nullopt;
+}
+
 /// Checks that the given token is the first token that occurs after the
 /// given location (this excludes comments and whitespace). Returns the location
 /// immediately after the specified token. If the token is not found or the
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 8dd72db8f5b4a..aa8b3870a188c 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -248,6 +248,25 @@ ExprResult Parser::ParseArrayBoundExpression() {
   // If we parse the bound of a VLA... we parse a non-constant
   // constant-expression!
   Actions.ExprEvalContexts.back().InConditionallyConstantEvaluateContext = true;
+  // For a VLA type inside an unevaluated operator like:
+  //
+  //   sizeof(typeof(*(int (*)[N])array))
+  //
+  // N and array are supposed to be ODR-used.
+  // Initially when encountering `array`, it is deemed unevaluated and non-ODR
+  // used because that occurs before parsing the type cast. Therefore we use
+  // Sema::TransformToPotentiallyEvaluated() to rebuild the expression to ensure
+  // it's actually ODR-used.
+  //
+  // However, in other unevaluated contexts as in constraint substitution, it
+  // would end up rebuilding the type twice which is unnecessary. So we push up
+  // a flag to help distinguish these cases.
+  for (auto Iter = Actions.ExprEvalContexts.rbegin() + 1;
+       Iter != Actions.ExprEvalContexts.rend(); ++Iter) {
+    if (!Iter->isUnevaluated())
+      break;
+    Iter->InConditionallyConstantEvaluateContext = true;
+  }
   return ParseConstantExpressionInExprEvalContext(NotTypeCast);
 }
 
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
index 27e6b5b2cb393..8963cad86dbca 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -200,6 +200,7 @@ struct IndirectLocalPathEntry {
     LifetimeBoundCall,
     TemporaryCopy,
     LambdaCaptureInit,
+    MemberExpr,
     GslReferenceInit,
     GslPointerInit,
     GslPointerAssignment,
@@ -593,19 +594,6 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
     Path.pop_back();
   };
   auto VisitGSLPointerArg = [&](const FunctionDecl *Callee, Expr *Arg) {
-    // We are not interested in the temporary base objects of gsl Pointers:
-    //   Temp().ptr; // Here ptr might not dangle.
-    if (isa<MemberExpr>(Arg->IgnoreImpCasts()))
-      return;
-    // Avoid false positives when the object is constructed from a conditional
-    // operator argument. A common case is:
-    //   // 'ptr' might not be owned by the Owner object.
-    //   std::string_view s = cond() ? Owner().ptr : sv;
-    if (const auto *Cond =
-            dyn_cast<AbstractConditionalOperator>(Arg->IgnoreImpCasts());
-        Cond && isPointerLikeType(Cond->getType()))
-      return;
-
     auto ReturnType = Callee->getReturnType();
 
     // Once we initialized a value with a non gsl-owner reference, it can no
@@ -726,6 +714,9 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
         Init = ILE->getInit(0);
     }
 
+    if (MemberExpr *ME = dyn_cast<MemberExpr>(Init->IgnoreImpCasts()))
+      Path.push_back(
+          {IndirectLocalPathEntry::MemberExpr, ME, ME->getMemberDecl()});
     // Step over any subobject adjustments; we may have a materialized
     // temporary inside them.
     Init = const_cast<Expr *>(Init->skipRValueSubobjectAdjustments());
@@ -1117,10 +1108,12 @@ enum PathLifetimeKind {
 static PathLifetimeKind
 shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) {
   for (auto Elem : Path) {
-    if (Elem.Kind == IndirectLocalPathEntry::DefaultInit)
-      return PathLifetimeKind::Extend;
-    if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit)
-      return PathLifetimeKind::NoExtend;
+    if (Elem.Kind == IndirectLocalPathEntry::MemberExpr ||
+        Elem.Kind == IndirectLocalPathEntry::LambdaCaptureInit)
+      continue;
+    return Elem.Kind == IndirectLocalPathEntry::DefaultInit
+               ? PathLifetimeKind::Extend
+               : PathLifetimeKind::NoExtend;
   }
   return PathLifetimeKind::Extend;
 }
@@ -1138,6 +1131,7 @@ static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I,
     case IndirectLocalPathEntry::GslPointerInit:
     case IndirectLocalPathEntry::GslPointerAssignment:
     case IndirectLocalPathEntry::ParenAggInit:
+    case IndirectLocalPathEntry::MemberExpr:
       // These exist primarily to mark the path as not permitting or
       // supporting lifetime extension.
       break;
@@ -1167,6 +1161,7 @@ static bool pathOnlyHandlesGslPointer(const IndirectLocalPath &Path) {
     case IndirectLocalPathEntry::VarInit:
     case IndirectLocalPathEntry::AddressOf:
     case IndirectLocalPathEntry::LifetimeBoundCall:
+    case IndirectLocalPathEntry::MemberExpr:
       continue;
     case IndirectLocalPathEntry::GslPointerInit:
     case IndirectLocalPathEntry::GslReferenceInit:
@@ -1193,13 +1188,34 @@ enum AnalysisResult {
 // Analyze cases where a GSLPointer is initialized or assigned from a
 // temporary owner object.
 static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path,
-                                               Local L) {
+                                               Local L, LifetimeKind LK) {
   if (!pathOnlyHandlesGslPointer(Path))
     return NotGSLPointer;
 
   // At this point, Path represents a series of operations involving a
   // GSLPointer, either in the process of initialization or assignment.
 
+  // Process  temporary base objects for MemberExpr cases, e.g. Temp().field.
+  for (const auto &E : Path) {
+    if (E.Kind == IndirectLocalPathEntry::MemberExpr) {
+      // Avoid interfering  with the local base object.
+      if (pathContainsInit(Path))
+        return Abandon;
+
+      // We are not interested in the temporary base objects of gsl Pointers:
+      //   auto p1 = Temp().ptr; // Here p1 might not dangle.
+      // However, we want to diagnose for gsl owner fields:
+      //   auto p2 = Temp().owner; // Here p2 is dangling.
+      if (const auto *FD = llvm::dyn_cast_or_null<FieldDecl>(E.D);
+          FD && !FD->getType()->isReferenceType() &&
+          isRecordWithAttr<OwnerAttr>(FD->getType()) &&
+          LK != LK_MemInitializer) {
+        return Report;
+      }
+      return Abandon;
+    }
+  }
+
   // Note: A LifetimeBoundCall can appear interleaved in this sequence.
   // For example:
   //    const std::string& Ref(const std::string& a [[clang::lifetimebound]]);
@@ -1297,7 +1313,7 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity,
     auto *MTE = dyn_cast<MaterializeTemporaryExpr>(L);
 
     bool IsGslPtrValueFromGslTempOwner = true;
-    switch (analyzePathForGSLPointer(Path, L)) {
+    switch (analyzePathForGSLPointer(Path, L, LK)) {
     case Abandon:
       return false;
     case Skip:
@@ -1429,6 +1445,7 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity,
         auto *DRE = dyn_cast<DeclRefExpr>(L);
         // Suppress false positives for code like the one below:
         //   Ctor(unique_ptr<T> up) : pointer(up.get()), owner(move(up)) {}
+        // FIXME: move this logic to analyzePathForGSLPointer.
         if (DRE && isRecordWithAttr<OwnerAttr>(DRE->getType()))
           return false;
 
@@ -1527,6 +1544,7 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity,
 
       case IndirectLocalPathEntry::LifetimeBoundCall:
       case IndirectLocalPathEntry::TemporaryCopy:
+      case IndirectLocalPathEntry::MemberExpr:
       case IndirectLocalPathEntry::GslPointerInit:
       case IndirectLocalPathEntry::GslReferenceInit:
       case IndirectLocalPathEntry::GslPointerAssignment:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c41164a2f1af1..e440f60526bb3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3634,6 +3634,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
   case AtomicExpr::AO__atomic_store_n:
   case AtomicExpr::AO__scoped_atomic_store:
   case AtomicExpr::AO__scoped_atomic_store_n:
+  case AtomicExpr::AO__atomic_clear:
     return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
            OrderingCABI != llvm::AtomicOrderingCABI::acquire &&
            OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
@@ -3686,12 +3687,18 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
     C11CmpXchg,
 
     // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
-    GNUCmpXchg
+    GNUCmpXchg,
+
+    // bool __atomic_test_and_set(A *, int)
+    TestAndSetByte,
+
+    // void __atomic_clear(A *, int)
+    ClearByte,
   } Form = Init;
 
-  const unsigned NumForm = GNUCmpXchg + 1;
-  const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
-  const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
+  const unsigned NumForm = ClearByte + 1;
+  const unsigned NumArgs[] = {2, 2, 3, 3, 3, 3, 4, 5, 6, 2, 2};
+  const unsigned NumVals[] = {1, 0, 1, 1, 1, 1, 2, 2, 3, 0, 0};
   // where:
   //   C is an appropriate type,
   //   A is volatile _Atomic(C) for __c11 builtins and is C for GNU builtins,
@@ -3852,6 +3859,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
   case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
     Form = GNUCmpXchg;
     break;
+
+  case AtomicExpr::AO__atomic_test_and_set:
+    Form = TestAndSetByte;
+    break;
+
+  case AtomicExpr::AO__atomic_clear:
+    Form = ClearByte;
+    break;
   }
 
   unsigned AdjustedNumArgs = NumArgs[Form];
@@ -3911,14 +3926,28 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
     }
   }
 
-  // Pointer to object of size zero is not allowed.
-  if (RequireCompleteType(Ptr->getBeginLoc(), AtomTy,
-                          diag::err_incomplete_type))
-    return ExprError();
-  if (Context.getTypeInfoInChars(AtomTy).Width.isZero()) {
-    Diag(ExprRange.getBegin(), diag::err_atomic_builtin_must_be_pointer)
-        << Ptr->getType() << 1 << Ptr->getSourceRange();
-    return ExprError();
+  if (Form != TestAndSetByte && Form != ClearByte) {
+    // Pointer to object of size zero is not allowed.
+    if (RequireCompleteType(Ptr->getBeginLoc(), AtomTy,
+                            diag::err_incomplete_type))
+      return ExprError();
+
+    if (Context.getTypeInfoInChars(AtomTy).Width.isZero()) {
+      Diag(ExprRange.getBegin(), diag::err_atomic_builtin_must_be_pointer)
+          << Ptr->getType() << 1 << Ptr->getSourceRange();
+      return ExprError();
+    }
+  } else {
+    // The __atomic_clear and __atomic_test_and_set intrinsics accept any
+    // non-const pointer type, including void* and pointers to incomplete
+    // structs, but only access the first byte.
+    AtomTy = Context.CharTy;
+    AtomTy = AtomTy.withCVRQualifiers(
+        pointerType->getPointeeType().getCVRQualifiers());
+    QualType PointerQT = Context.getPointerType(AtomTy);
+    pointerType = PointerQT->getAs<PointerType>();
+    Ptr = ImpCastExprToType(Ptr, PointerQT, CK_BitCast).get();
+    ValType = AtomTy;
   }
 
   // For an arithmetic operation, the implied arithmetic must be well-formed.
@@ -3997,10 +4026,10 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
   ValType.removeLocalVolatile();
   ValType.removeLocalConst();
   QualType ResultType = ValType;
-  if (Form == Copy || Form == LoadCopy || Form == GNUXchg ||
-      Form == Init)
+  if (Form == Copy || Form == LoadCopy || Form == GNUXchg || Form == Init ||
+      Form == ClearByte)
     ResultType = Context.VoidTy;
-  else if (Form == C11CmpXchg || Form == GNUCmpXchg)
+  else if (Form == C11CmpXchg || Form == GNUCmpXchg || Form == TestAndSetByte)
     ResultType = Context.BoolTy;
 
   // The type of a parameter passed 'by value'. In the GNU atomics, such
@@ -4045,6 +4074,10 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
       APIOrderedArgs.push_back(Args[1]); // Order
       APIOrderedArgs.push_back(Args[3]); // OrderFail
       break;
+    case TestAndSetByte:
+    case ClearByte:
+      APIOrderedArgs.push_back(Args[1]); // Order
+      break;
     }
   } else
     APIOrderedArgs.append(Args.begin(), Args.end());
@@ -4130,6 +4163,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
     SubExprs.push_back(APIOrderedArgs[1]); // Val1
     break;
   case Load:
+  case TestAndSetByte:
+  case ClearByte:
     SubExprs.push_back(APIOrderedArgs[1]); // Order
     break;
   case LoadCopy:
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 8a848df70cc5a..69cda6e68bd36 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -34,6 +34,7 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/Designator.h"
+#include "clang/Sema/HeuristicResolver.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/ParsedAttr.h"
@@ -5861,8 +5862,10 @@ void SemaCodeCompletion::CodeCompleteMemberReferenceExpr(
   enum CodeCompletionContext::Kind contextKind;
 
   if (IsArrow) {
-    if (const auto *Ptr = ConvertedBaseType->getAs<PointerType>())
-      ConvertedBaseType = Ptr->getPointeeType();
+    if (QualType PointeeType = Resolver.getPointeeType(ConvertedBaseType);
+        !PointeeType.isNull()) {
+      ConvertedBaseType = PointeeType;
+    }
   }
 
   if (IsArrow) {
@@ -5899,8 +5902,9 @@ void SemaCodeCompletion::CodeCompleteMemberReferenceExpr(
     ExprValueKind BaseKind = Base->getValueKind();
 
     if (IsArrow) {
-      if (const PointerType *Ptr = BaseType->getAs<PointerType>()) {
-        BaseType = Ptr->getPointeeType();
+      if (QualType PointeeType = Resolver.getPointeeType(BaseType);
+          !PointeeType.isNull()) {
+        BaseType = PointeeType;
         BaseKind = VK_LValue;
       } else if (BaseType->isObjCObjectPointerType() ||
                  BaseType->isTemplateTypeParmType()) {
@@ -10472,4 +10476,5 @@ void SemaCodeCompletion::GatherGlobalCodeCompletions(
 
 SemaCodeCompletion::SemaCodeCompletion(Sema &S,
                                        CodeCompleteConsumer *CompletionConsumer)
-    : SemaBase(S), CodeCompleter(CompletionConsumer) {}
+    : SemaBase(S), CodeCompleter(CompletionConsumer),
+      Resolver(S.getASTContext()) {}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 6a40a59c977d7..8a77cbf8c9477 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -1027,6 +1027,9 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
     ContextScope.emplace(S, const_cast<DeclContext *>(cast<DeclContext>(RD)),
                          /*NewThisContext=*/false);
   }
+  EnterExpressionEvaluationContext UnevaluatedContext(
+      S, Sema::ExpressionEvaluationContext::Unevaluated,
+      Sema::ReuseLambdaContextDecl);
   ExprResult SubstConstr = S.SubstConstraintExprWithoutSatisfaction(
       const_cast<clang::Expr *>(ConstrExpr), MLTAL);
   if (SFINAE.hasErrorOccurred() || !SubstConstr.isUsable())
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index a867ed73bd403..839b3a1cccdcc 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -2655,6 +2655,15 @@ CXXBaseSpecifier *Sema::CheckBaseSpecifier(CXXRecordDecl *Class,
       return nullptr;
     }
 
+    if (BaseType.hasQualifiers()) {
+      std::string Quals =
+          BaseType.getQualifiers().getAsString(Context.getPrintingPolicy());
+      Diag(BaseLoc, diag::warn_qual_base_type)
+          << Quals << std::count(Quals.begin(), Quals.end(), ' ') + 1
+          << BaseType;
+      Diag(BaseLoc, diag::note_base_class_specified_here) << BaseType;
+    }
+
     // For the MS ABI, propagate DLL attributes to base class templates.
     if (Context.getTargetInfo().getCXXABI().isMicrosoft() ||
         Context.getTargetInfo().getTriple().isPS()) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ae40895980d90..d5273d463d7c0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4630,8 +4630,9 @@ ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo,
 
   // Adds overload of TransformToPotentiallyEvaluated for TypeSourceInfo to
   // properly deal with VLAs in nested calls of sizeof and typeof.
-  if (isUnevaluatedContext() && ExprKind == UETT_SizeOf &&
-      TInfo->getType()->isVariablyModifiedType())
+  if (currentEvaluationContext().isUnevaluated() &&
+      currentEvaluationContext().InConditionallyConstantEvaluateContext &&
+      ExprKind == UETT_SizeOf && TInfo->getType()->isVariablyModifiedType())
     TInfo = TransformToPotentiallyEvaluated(TInfo);
 
   // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t.
@@ -8387,6 +8388,11 @@ OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond,
 
 /// Return true if the Expr is block type
 static bool checkBlockType(Sema &S, const Expr *E) {
+  if (E->getType()->isBlockPointerType()) {
+    S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block);
+    return true;
+  }
+
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
     QualType Ty = CE->getCallee()->getType();
     if (Ty->isBlockPointerType()) {
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 9d8cdc9c08525..e18e3c197383e 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -1614,7 +1614,7 @@ bool Sema::isUsableModule(const Module *M) {
 
   // Otherwise, the global module fragment from other translation unit is not
   // directly usable.
-  if (M->isGlobalModule())
+  if (M->isExplicitGlobalModule())
     return false;
 
   Module *Current = getCurrentModule();
@@ -1628,6 +1628,8 @@ bool Sema::isUsableModule(const Module *M) {
   // module should be visible to the decls in the implicit global module.
   if (Current->isImplicitGlobalModule())
     Current = Current->getTopLevelModule();
+  if (M->isImplicitGlobalModule())
+    M = M->getTopLevelModule();
 
   // If M is the module we're parsing or M and the current module unit lives in
   // the same module, M should be usable.
diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp
index 27da14de4c04f..000934225402a 100644
--- a/clang/lib/Sema/SemaOpenACCClause.cpp
+++ b/clang/lib/Sema/SemaOpenACCClause.cpp
@@ -1360,7 +1360,6 @@ ExprResult CheckGangKernelsExpr(SemaOpenACC &S,
   }
   case OpenACCGangKind::Static:
     return CheckGangStaticExpr(S, E);
-    return ExprError();
   }
   llvm_unreachable("Unknown gang kind in gang kernels check");
 }
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index fd1a6017712d2..7feca138e3e2c 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -1045,9 +1045,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_vpshrdw128:
   case X86::BI__builtin_ia32_vpshrdw256:
   case X86::BI__builtin_ia32_vpshrdw512:
-  case X86::BI__builtin_ia32_vminmaxnepbf16128:
-  case X86::BI__builtin_ia32_vminmaxnepbf16256:
-  case X86::BI__builtin_ia32_vminmaxnepbf16512:
+  case X86::BI__builtin_ia32_vminmaxbf16128:
+  case X86::BI__builtin_ia32_vminmaxbf16256:
+  case X86::BI__builtin_ia32_vminmaxbf16512:
   case X86::BI__builtin_ia32_vminmaxpd128_mask:
   case X86::BI__builtin_ia32_vminmaxpd256_round_mask:
   case X86::BI__builtin_ia32_vminmaxph128_mask:
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c7c17e09a30e0..066c4b1533552 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7198,6 +7198,8 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
 
   bool ModulesCodegen =
       !D->isDependentType() &&
+      D->getTemplateSpecializationKind() !=
+          TSK_ExplicitInstantiationDeclaration &&
       (Writer->getLangOpts().ModulesDebugInfo || D->isInNamedModule());
   Record->push_back(ModulesCodegen);
   if (ModulesCodegen)
diff --git a/clang/test/AST/ByteCode/cxx2a.cpp b/clang/test/AST/ByteCode/cxx2a.cpp
index f6006881cee4d..e478a0ddc4c14 100644
--- a/clang/test/AST/ByteCode/cxx2a.cpp
+++ b/clang/test/AST/ByteCode/cxx2a.cpp
@@ -139,8 +139,8 @@ namespace TypeId {
   static_assert(&B2().ti1 == &typeid(B));
   static_assert(&B2().ti2 == &typeid(B2));
   extern B2 extern_b2;
-  static_assert(&typeid(extern_b2) == &typeid(B2)); // both-error {{constant expression}} \
-                                                    // both-note{{typeid applied to object 'extern_b2' whose dynamic type is not constant}}
+  static_assert(&typeid(extern_b2) == &typeid(B2)); // expected-error {{constant expression}} \
+                                                    // expected-note{{typeid applied to object 'extern_b2' whose dynamic type is not constant}}
 
 
   constexpr B2 b2;
diff --git a/clang/test/CodeCompletion/member-access.cpp b/clang/test/CodeCompletion/member-access.cpp
index 912f269db6c1a..ab6dc69bf2923 100644
--- a/clang/test/CodeCompletion/member-access.cpp
+++ b/clang/test/CodeCompletion/member-access.cpp
@@ -384,3 +384,20 @@ void Foo() {
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:382:5 %s -o - | FileCheck -check-prefix=CHECK-DEREF-DEPENDENT %s
 // CHECK-DEREF-DEPENDENT: [#void#]Add()
 }
+
+namespace dependent_smart_pointer {
+template <typename T>
+struct smart_pointer {
+  T* operator->();
+};
+
+template <typename T>
+struct node {
+  smart_pointer<node<T>> next;
+  void foo() {
+    next->next;
+    // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:398:11 %s -o - | FileCheck -check-prefix=CHECK-DEPENDENT-SMARTPTR %s
+    // CHECK-DEPENDENT-SMARTPTR: [#smart_pointer<node<T>>#]next
+  }
+};
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
index b1f404c0ec8c0..cf1c00cdd56b2 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
@@ -211,3 +211,19 @@ svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
 svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
     return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
 }
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 1)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z20test_svdup_laneq_mf8u13__SVMfloat8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _mf8)(zn, 1);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
index 06eec1e00900c..d46e67b9918aa 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -211,3 +211,19 @@ svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) {
 svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) {
     return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 6);
 }
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 6)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svextq_mf8u13__SVMfloat8_tS_
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 6)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svextq_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+    return SVE_ACLE_FUNC(svextq, _mf8,,)(zn, zm, 6);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
index 7a19cde9abd87..3d3bb0d17a50c 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -212,3 +212,19 @@ svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
 svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) {
     return SVE_ACLE_FUNC(svtblq, _bf16,,)(zn, zm);
 }
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svtblq_mf8u13__SVMfloat8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svtblq_mf8(svmfloat8_t zn, svuint8_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _mf8,,)(zn, zm);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
index f4aaed586c731..674bd9cbb083b 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -212,3 +212,19 @@ svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm)
 svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t zm) {
     return SVE_ACLE_FUNC(svtbxq, _bf16,,)(passthru, zn, zm);
 }
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svtbxq_mf8u13__SVMfloat8_tS_u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svtbxq_mf8(svmfloat8_t passthru, svmfloat8_t zn, svuint8_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _mf8,,)(passthru, zn, zm);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index 8c639120409ec..35878d61f9549 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -214,4 +214,19 @@ svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
     return SVE_ACLE_FUNC(svuzpq1,_bf16)(zn, zm);
 }
 
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z16test_svuzpq1_mf8u13__SVMfloat8_tS_
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svuzpq1_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+    return SVE_ACLE_FUNC(svuzpq1,_mf8)(zn, zm);
+}
 
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index 756d2538317e0..a22f20f30039f 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -214,4 +214,18 @@ svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
     return SVE_ACLE_FUNC(svuzpq2,_bf16)(zn, zm);
 }
 
-
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z16test_svuzpq2_mf8u13__SVMfloat8_tS_
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svuzpq2_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+    return SVE_ACLE_FUNC(svuzpq2,_mf8)(zn, zm);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
index 6684bb2cf2d99..4366973062697 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -214,4 +214,18 @@ svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
     return SVE_ACLE_FUNC(svzipq1,_bf16)(zn, zm);
 }
 
-
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z16test_svzipq1_mf8u13__SVMfloat8_tS_
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svzipq1_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+    return SVE_ACLE_FUNC(svzipq1,_mf8)(zn, zm);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 2bfd72b32d1cc..4e27ec463c08e 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -214,4 +214,18 @@ svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
     return SVE_ACLE_FUNC(svzipq2,_bf16)(zn, zm);
 }
 
-
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_mf8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z16test_svzipq2_mf8u13__SVMfloat8_tS_
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svzipq2_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+    return SVE_ACLE_FUNC(svzipq2,_mf8)(zn, zm);
+}
diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c
index f8d5f9912c0d7..cfe115bf97ed3 100644
--- a/clang/test/CodeGen/AArch64/targetattr.c
+++ b/clang/test/CodeGen/AArch64/targetattr.c
@@ -218,7 +218,7 @@ void applem4() {}
 // CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
-// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
+// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c
index 4e80d8b36e194..4e467b36b2348 100644
--- a/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c
@@ -5,25 +5,25 @@
 
 #include <immintrin.h>
 
-__m512bh test_mm512_minmaxne_pbh(__m512bh __A, __m512bh __B) {
-  // CHECK-LABEL: @test_mm512_minmaxne_pbh(
-  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(
-  return _mm512_minmaxne_pbh(__A, __B, 127);
+__m512bh test_mm512_minmax_pbh(__m512bh __A, __m512bh __B) {
+  // CHECK-LABEL: @test_mm512_minmax_pbh(
+  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(
+  return _mm512_minmax_pbh(__A, __B, 127);
 }
 
-__m512bh test_mm512_mask_minmaxne_pbh(__m512bh __A, __mmask32 __B, __m512bh __C, __m512bh __D) {
-  // CHECK-LABEL: @test_mm512_mask_minmaxne_pbh(
-  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(
+__m512bh test_mm512_mask_minmax_pbh(__m512bh __A, __mmask32 __B, __m512bh __C, __m512bh __D) {
+  // CHECK-LABEL: @test_mm512_mask_minmax_pbh(
+  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(
   // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
-  return _mm512_mask_minmaxne_pbh(__A, __B, __C, __D, 127);
+  return _mm512_mask_minmax_pbh(__A, __B, __C, __D, 127);
 }
 
-__m512bh test_mm512_maskz_minmaxne_pbh(__mmask32 __A, __m512bh __B, __m512bh __C) {
-  // CHECK-LABEL: @test_mm512_maskz_minmaxne_pbh(
-  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(
+__m512bh test_mm512_maskz_minmax_pbh(__mmask32 __A, __m512bh __B, __m512bh __C) {
+  // CHECK-LABEL: @test_mm512_maskz_minmax_pbh(
+  // CHECK: call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(
   // CHECK: zeroinitializer
   // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
-  return _mm512_maskz_minmaxne_pbh(__A, __B, __C, 127);
+  return _mm512_maskz_minmax_pbh(__A, __B, __C, 127);
 }
 
 __m512d test_mm512_minmax_pd(__m512d __A, __m512d __B) {
diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
index e487c3fad49dd..6db7801eb0040 100644
--- a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
+++ b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
@@ -5,20 +5,20 @@
 
 #include <immintrin.h>
 
-__m128bh test_mm_minmaxne_pbh(__m128bh __A, __m128bh __B) {
-  return _mm_minmaxne_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m128bh test_mm_minmax_pbh(__m128bh __A, __m128bh __B) {
+  return _mm_minmax_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
-__m128bh test_mm_mask_minmaxne_pbh(__m128bh __A, __mmask8 __B, __m128bh __C, __m128bh __D) {
-  return _mm_mask_minmaxne_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m128bh test_mm_mask_minmax_pbh(__m128bh __A, __mmask8 __B, __m128bh __C, __m128bh __D) {
+  return _mm_mask_minmax_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
-__m256bh test_mm256_minmaxne_pbh(__m256bh __A, __m256bh __B) {
-  return _mm256_minmaxne_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m256bh test_mm256_minmax_pbh(__m256bh __A, __m256bh __B) {
+  return _mm256_minmax_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
-__m256bh test_mm256_mask_minmaxne_pbh(__m256bh __A, __mmask16 __B, __m256bh __C, __m256bh __D) {
-  return _mm256_mask_minmaxne_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m256bh test_mm256_mask_minmax_pbh(__m256bh __A, __mmask16 __B, __m256bh __C, __m256bh __D) {
+  return _mm256_mask_minmax_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
 __m128d test_mm_minmax_pd(__m128d __A, __m128d __B) {
@@ -69,12 +69,12 @@ __m256 test_mm256_mask_minmax_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __
   return _mm256_mask_minmax_ps(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
-__m512bh test_mm512_minmaxne_pbh(__m512bh __A, __m512bh __B) {
-  return _mm512_minmaxne_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m512bh test_mm512_minmax_pbh(__m512bh __A, __m512bh __B) {
+  return _mm512_minmax_pbh(__A, __B, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
-__m512bh test_mm512_mask_minmaxne_pbh(__m512bh __A, __mmask32 __B, __m512bh __C, __m512bh __D) {
-  return _mm512_mask_minmaxne_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
+__m512bh test_mm512_mask_minmax_pbh(__m512bh __A, __mmask32 __B, __m512bh __C, __m512bh __D) {
+  return _mm512_mask_minmax_pbh(__A, __B, __C, __D, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}}
 }
 
 __m512d test_mm512_minmax_pd(__m512d __A, __m512d __B) {
diff --git a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
index 1efafe24ab125..7e21858c71834 100644
--- a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
@@ -5,46 +5,46 @@
 
 #include <immintrin.h>
 
-__m128bh test_mm_minmaxne_pbh(__m128bh __A, __m128bh __B) {
-  // CHECK-LABEL: @test_mm_minmaxne_pbh(
-  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(
-  return _mm_minmaxne_pbh(__A, __B, 127);
+__m128bh test_mm_minmax_pbh(__m128bh __A, __m128bh __B) {
+  // CHECK-LABEL: @test_mm_minmax_pbh(
+  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(
+  return _mm_minmax_pbh(__A, __B, 127);
 }
 
-__m128bh test_mm_mask_minmaxne_pbh(__m128bh __A, __mmask8 __B, __m128bh __C, __m128bh __D) {
-  // CHECK-LABEL: @test_mm_mask_minmaxne_pbh(
-  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(
+__m128bh test_mm_mask_minmax_pbh(__m128bh __A, __mmask8 __B, __m128bh __C, __m128bh __D) {
+  // CHECK-LABEL: @test_mm_mask_minmax_pbh(
+  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(
   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
-  return _mm_mask_minmaxne_pbh(__A, __B, __C, __D, 127);
+  return _mm_mask_minmax_pbh(__A, __B, __C, __D, 127);
 }
 
-__m128bh test_mm_maskz_minmaxne_pbh(__mmask8 __A, __m128bh __B, __m128bh __C) {
-  // CHECK-LABEL: @test_mm_maskz_minmaxne_pbh(
-  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(
+__m128bh test_mm_maskz_minmax_pbh(__mmask8 __A, __m128bh __B, __m128bh __C) {
+  // CHECK-LABEL: @test_mm_maskz_minmax_pbh(
+  // CHECK: call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(
   // CHECK: zeroinitializer
   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
-  return _mm_maskz_minmaxne_pbh(__A, __B, __C, 127);
+  return _mm_maskz_minmax_pbh(__A, __B, __C, 127);
 }
 
-__m256bh test_mm256_minmaxne_pbh(__m256bh __A, __m256bh __B) {
-  // CHECK-LABEL: @test_mm256_minmaxne_pbh(
-  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(
-  return _mm256_minmaxne_pbh(__A, __B, 127);
+__m256bh test_mm256_minmax_pbh(__m256bh __A, __m256bh __B) {
+  // CHECK-LABEL: @test_mm256_minmax_pbh(
+  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(
+  return _mm256_minmax_pbh(__A, __B, 127);
 }
 
-__m256bh test_mm256_mask_minmaxne_pbh(__m256bh __A, __mmask16 __B, __m256bh __C, __m256bh __D) {
-  // CHECK-LABEL: @test_mm256_mask_minmaxne_pbh(
-  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(
+__m256bh test_mm256_mask_minmax_pbh(__m256bh __A, __mmask16 __B, __m256bh __C, __m256bh __D) {
+  // CHECK-LABEL: @test_mm256_mask_minmax_pbh(
+  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(
   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
-  return _mm256_mask_minmaxne_pbh(__A, __B, __C, __D, 127);
+  return _mm256_mask_minmax_pbh(__A, __B, __C, __D, 127);
 }
 
-__m256bh test_mm256_maskz_minmaxne_pbh(__mmask16 __A, __m256bh __B, __m256bh __C) {
-  // CHECK-LABEL: @test_mm256_maskz_minmaxne_pbh(
-  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(
+__m256bh test_mm256_maskz_minmax_pbh(__mmask16 __A, __m256bh __B, __m256bh __C) {
+  // CHECK-LABEL: @test_mm256_maskz_minmax_pbh(
+  // CHECK: call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(
   // CHECK: zeroinitializer
   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
-  return _mm256_maskz_minmaxne_pbh(__A, __B, __C, 127);
+  return _mm256_maskz_minmax_pbh(__A, __B, __C, 127);
 }
 
 __m128d test_mm_minmax_pd(__m128d __A, __m128d __B) {
diff --git a/clang/test/CodeGen/atomic-test-and-set.c b/clang/test/CodeGen/atomic-test-and-set.c
new file mode 100644
index 0000000000000..39d4cef16b21d
--- /dev/null
+++ b/clang/test/CodeGen/atomic-test-and-set.c
@@ -0,0 +1,345 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=aarch64-none-elf | FileCheck %s
+// REQUIRES: aarch64-registered-target
+
+#include <stdatomic.h>
+
+// CHECK-LABEL: define dso_local void @clear_relaxed(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] monotonic, align 1
+// CHECK-NEXT:    ret void
+//
+void clear_relaxed(char *ptr) {
+  __atomic_clear(ptr, memory_order_relaxed);
+}
+
+// CHECK-LABEL: define dso_local void @clear_seq_cst(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] seq_cst, align 1
+// CHECK-NEXT:    ret void
+//
+void clear_seq_cst(char *ptr) {
+  __atomic_clear(ptr, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: define dso_local void @clear_release(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] release, align 1
+// CHECK-NEXT:    ret void
+//
+void clear_release(char *ptr) {
+  __atomic_clear(ptr, memory_order_release);
+}
+
+// CHECK-LABEL: define dso_local void @clear_dynamic(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[ORDER:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ORDER_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[ORDER]], ptr [[ORDER_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ORDER_ADDR]], align 4
+// CHECK-NEXT:    switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
+// CHECK-NEXT:      i32 3, label %[[RELEASE:.*]]
+// CHECK-NEXT:      i32 5, label %[[SEQCST:.*]]
+// CHECK-NEXT:    ]
+// CHECK:       [[MONOTONIC]]:
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] monotonic, align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE:.*]]
+// CHECK:       [[RELEASE]]:
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] release, align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[SEQCST]]:
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] seq_cst, align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[ATOMIC_CONTINUE]]:
+// CHECK-NEXT:    ret void
+//
+void clear_dynamic(char *ptr, int order) {
+  __atomic_clear(ptr, order);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_relaxed(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_relaxed(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_relaxed);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_consume(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_consume(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_consume);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_acquire(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_acquire(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_acquire);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_release(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_release(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_release);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_acq_rel(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_acq_rel(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_acq_rel);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_seq_cst(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_seq_cst(char *ptr) {
+  __atomic_test_and_set(ptr, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_dynamic(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[ORDER:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ORDER_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[ORDER]], ptr [[ORDER_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ORDER_ADDR]], align 4
+// CHECK-NEXT:    switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
+// CHECK-NEXT:      i32 1, label %[[ACQUIRE:.*]]
+// CHECK-NEXT:      i32 2, label %[[ACQUIRE]]
+// CHECK-NEXT:      i32 3, label %[[RELEASE:.*]]
+// CHECK-NEXT:      i32 4, label %[[ACQREL:.*]]
+// CHECK-NEXT:      i32 5, label %[[SEQCST:.*]]
+// CHECK-NEXT:    ]
+// CHECK:       [[MONOTONIC]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP2]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE:.*]]
+// CHECK:       [[ACQUIRE]]:
+// CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i8 [[TMP3]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL1]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[RELEASE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
+// CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i8 [[TMP4]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL2]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[ACQREL]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
+// CHECK-NEXT:    [[TOBOOL3:%.*]] = icmp ne i8 [[TMP5]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL3]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[SEQCST]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
+// CHECK-NEXT:    [[TOBOOL4:%.*]] = icmp ne i8 [[TMP6]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL4]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    br label %[[ATOMIC_CONTINUE]]
+// CHECK:       [[ATOMIC_CONTINUE]]:
+// CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_dynamic(char *ptr, int order) {
+  __atomic_test_and_set(ptr, order);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_array(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X:%.*]] = alloca [10 x i32], align 4
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[X]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[ARRAYDECAY]], i8 1 seq_cst, align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_array() {
+  volatile int x[10];
+  __atomic_test_and_set(x, memory_order_seq_cst);
+}
+
+// These intrinsics accept any pointer type, including void and incomplete
+// structs, and always access the first byte regardless of the actual type
+// size.
+
+struct incomplete;
+
+// CHECK-LABEL: define dso_local void @clear_int(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] monotonic, align 4
+// CHECK-NEXT:    ret void
+//
+void clear_int(int *ptr) {
+  __atomic_clear(ptr, memory_order_relaxed);
+}
+// CHECK-LABEL: define dso_local void @clear_void(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] monotonic, align 1
+// CHECK-NEXT:    ret void
+//
+void clear_void(void *ptr) {
+  __atomic_clear(ptr, memory_order_relaxed);
+}
+// CHECK-LABEL: define dso_local void @clear_incomplete(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store atomic i8 0, ptr [[TMP0]] monotonic, align 1
+// CHECK-NEXT:    ret void
+//
+void clear_incomplete(struct incomplete *ptr) {
+  __atomic_clear(ptr, memory_order_relaxed);
+}
+
+// CHECK-LABEL: define dso_local void @test_and_set_int(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_int(int *ptr) {
+  __atomic_test_and_set(ptr, memory_order_relaxed);
+}
+// CHECK-LABEL: define dso_local void @test_and_set_void(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_void(void *ptr) {
+  __atomic_test_and_set(ptr, memory_order_relaxed);
+}
+// CHECK-LABEL: define dso_local void @test_and_set_incomplete(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
+// CHECK-NEXT:    store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT:    ret void
+//
+void test_and_set_incomplete(struct incomplete *ptr) {
+  __atomic_test_and_set(ptr, memory_order_relaxed);
+}
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index ffbce9ff8d6f4..7dc3b6bd59822 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -15,14 +15,14 @@ using i512x3x3 = _BitInt(512) __attribute__((matrix_type(3, 3)));
 // CHECK-NEXT:    [[A:%.*]] = alloca <3 x i8>, align 4
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i8>, align 4
 // CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
-// CHECK-NEXT:    [[LOADVEC4:%.*]] = load <4 x i8>, ptr [[A]], align 4
-// CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i8> [[LOADVEC4]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
+// CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[LOADVEC42:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVEC42]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[LOADVEC44:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[EXTRACTVEC5:%.*]] = shufflevector <4 x i8> [[LOADVEC44]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[EXTRACTVEC5:%.*]] = shufflevector <4 x i8> [[LOADVECN4]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[ADD:%.*]] = add <3 x i8> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
 // CHECK-NEXT:    store <3 x i8> [[ADD]], ptr [[RETVAL]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
@@ -38,10 +38,10 @@ i8x3 v1(i8x3 a) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
-// CHECK-NEXT:    [[LOADVEC4:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVEC4]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[LOADVEC42:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i32> [[LOADVEC42]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i32> [[LOADVECN2]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[ADD:%.*]] = add <3 x i32> [[EXTRACTVEC1]], [[EXTRACTVEC3]]
 // CHECK-NEXT:    ret <3 x i32> [[ADD]]
 //
@@ -53,14 +53,14 @@ i32x3 v2(i32x3 a) {
 // CHECK-SAME: ptr noundef byval(<3 x i512>) align 256 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
-// CHECK-NEXT:    [[LOADVEC4:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
-// CHECK-NEXT:    [[A:%.*]] = shufflevector <4 x i512> [[LOADVEC4]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
+// CHECK-NEXT:    [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
-// CHECK-NEXT:    [[LOADVEC41:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
-// CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVEC41]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[LOADVEC43:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
-// CHECK-NEXT:    [[EXTRACTVEC4:%.*]] = shufflevector <4 x i512> [[LOADVEC43]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
+// CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN3:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
+// CHECK-NEXT:    [[EXTRACTVEC4:%.*]] = shufflevector <4 x i512> [[LOADVECN3]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[ADD:%.*]] = add <3 x i512> [[EXTRACTVEC2]], [[EXTRACTVEC4]]
 // CHECK-NEXT:    ret <3 x i512> [[ADD]]
 //
diff --git a/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp b/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
index 0a9ac3fa510f5..e9436f11b5106 100644
--- a/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
+++ b/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
@@ -1,10 +1,12 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,DARWIN %s
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++17 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,DARWIN,CXX17 %s
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -debug-info-kind=limited -o - %s | FileCheck -check-prefixes=CHECK,DARWIN %s
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 1 -o - %s | FileCheck %s -check-prefix=STACK-PROT
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 2 -o - %s | FileCheck %s -check-prefix=STACK-PROT
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 3 -o - %s | FileCheck %s -check-prefix=STACK-PROT
 
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,ELF %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++17 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,ELF,CXX17 %s
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -debug-info-kind=limited -o - %s | FileCheck -check-prefixes=CHECK,ELF %s
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 1 -o - %s | FileCheck %s -check-prefix=STACK-PROT
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 2 -o - %s | FileCheck %s -check-prefix=STACK-PROT
@@ -20,6 +22,10 @@
 // CHECK: @__const._Z13testArrayInitv.c0 = private unnamed_addr constant %struct.Class0 { { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN5Base011nonvirtual0Ev, i32 0, i64 35591) to i64), i64 0 } }, align 8
 // CHECK: @__const._Z13testArrayInitv.c1 = private unnamed_addr constant %struct.Class0 { { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN5Base08virtual1Ev_vfpthunk_, i32 0, i64 35591) to i64), i64 0 } }, align 8
 
+// CHECK: @_ZN22testNoexceptConversion6mfptr1E = global { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN22testNoexceptConversion1S19nonvirtual_noexceptEv, i32 0, i64 [[TYPEDISC3:.*]]) to i64), i64 0 },
+// CHECK: @_ZN22testNoexceptConversion6mfptr2E = global { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN22testNoexceptConversion1S16virtual_noexceptEv_vfpthunk_, i32 0, i64 [[TYPEDISC3]]) to i64), i64 0 },
+// CHECK: @_ZN22testNoexceptConversion15mfptr3_noexceptE = global { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN22testNoexceptConversion1S19nonvirtual_noexceptEv, i32 0, i64 [[TYPEDISC3]]) to i64), i64 0 },
+
 // CHECK: @_ZTV5Base0 = unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI5Base0,
 // CHECK-SAME: ptr ptrauth (ptr @_ZN5Base08virtual1Ev, i32 0, i64 55600, ptr getelementptr inbounds ({ [5 x ptr] }, ptr @_ZTV5Base0, i32 0, i32 0, i32 2)),
 // CHECK-SAME: ptr ptrauth (ptr @_ZN5Base08virtual3Ev, i32 0, i64 53007, ptr getelementptr inbounds ({ [5 x ptr] }, ptr @_ZTV5Base0, i32 0, i32 0, i32 3)),
@@ -77,6 +83,9 @@ struct Derived1 : Base0, Base1 {
 };
 
 typedef void (Base0::*MethodTy0)();
+#if __cplusplus >= 201703L
+typedef void (Base0::*NoExceptMethodTy0)() noexcept;
+#endif
 typedef void (Base0::*VariadicMethodTy0)(int, ...);
 typedef void (Derived0::*MethodTy1)();
 
@@ -293,6 +302,16 @@ void test1(Base0 *a0, MethodTy0 a1) {
   (a0->*a1)();
 }
 
+// CXX17: define{{.*}} void @_Z14test1_noexceptP5Base0MS_DoFvvE(
+// CXX17: %[[V14:.*]] = phi ptr [ %{{.*}}, {{.*}} ], [ %{{.*}}, {{.*}} ]
+// CXX17: %[[V15:.*]] = phi i64 [ 0, {{.*}} ], [ [[TYPEDISC0]], {{.*}} ]
+// CXX17: call void %[[V14]](ptr noundef nonnull align {{[0-9]+}} dereferenceable(8) %{{.*}}) {{.*}}[ "ptrauth"(i32 0, i64 %[[V15]]) ]
+#if __cplusplus >= 201703L
+void test1_noexcept(Base0 *a0, NoExceptMethodTy0 a1) {
+  (a0->*a1)();
+}
+#endif
+
 // CHECK: define{{.*}} void @_Z15testConversion0M5Base0FvvEM8Derived0FvvE([2 x i64] %[[METHOD0_COERCE:.*]], [2 x i64] %[[METHOD1_COERCE:.*]])
 // CHECK: %[[METHOD0:.*]] = alloca { i64, i64 }, align 8
 // CHECK: %[[METHOD1:.*]] = alloca { i64, i64 }, align 8
@@ -438,3 +457,47 @@ void testArrayInit() {
 void testConvertNull() {
   VariadicMethodTy0 t = (VariadicMethodTy0)(MethodTy0{});
 }
+
+namespace testNoexceptConversion {
+
+// CHECK-LABEL: define internal void @__cxx_global_var_init()
+// CHECK: %[[V0:.*]] = load { i64, i64 }, ptr @_ZN22testNoexceptConversion15mfptr0_noexceptE, align 8
+// CHECK: store { i64, i64 } %[[V0]], ptr @_ZN22testNoexceptConversion6mfptr4E, align 8
+
+// CHECK: define {{.*}}void @_ZN22testNoexceptConversion5test0Ev()
+// CHECK: %[[P0:.*]] = alloca { i64, i64 }, align 8
+// CHECK: store { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN22testNoexceptConversion1S19nonvirtual_noexceptEv, i32 0, i64 [[TYPEDISC3]]) to i64), i64 0 }, ptr %[[P0]], align 8,
+
+// CHECK: define {{.*}}void @_ZN22testNoexceptConversion5test1Ev()
+// CHECK: %[[P0:.*]] = alloca { i64, i64 }, align 8
+// CHECK: store { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN22testNoexceptConversion1S16virtual_noexceptEv_vfpthunk_, i32 0, i64 [[TYPEDISC3]]) to i64), i64 0 }, ptr %[[P0]], align 8,
+
+// CHECK: define {{.*}}void @_ZN22testNoexceptConversion5test2Ev()
+// CHECK: %[[P0:.*]] = alloca { i64, i64 }, align 8
+// CHECK: %[[V0:.*]] = load { i64, i64 }, ptr @_ZN22testNoexceptConversion15mfptr0_noexceptE, align 8
+// CHECK: store { i64, i64 } %[[V0]], ptr %[[P0]], align 8,
+
+struct S {
+  void nonvirtual_noexcept() noexcept;
+  virtual void virtual_noexcept() noexcept;
+};
+
+void (S::*mfptr0_noexcept)() noexcept;
+void (S::*mfptr1)() = &S::nonvirtual_noexcept;
+void (S::*mfptr2)() = &S::virtual_noexcept;
+void (S::*mfptr3_noexcept)() noexcept = &S::nonvirtual_noexcept;
+void (S::*mfptr4)() = mfptr0_noexcept;
+
+void test0() {
+  void (S::*p0)() = &S::nonvirtual_noexcept;
+}
+
+void test1() {
+  void (S::*p0)() = &S::virtual_noexcept;
+}
+
+void test2() {
+  void (S::*p0)() = mfptr0_noexcept;
+}
+
+}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
index 8f57713fe1f04..3c2653bf34124 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -106,7 +106,7 @@ typedef double __attribute__((ext_vector_type(16))) double16;
 // CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i16, align 32
 // CHECK: store volatile i32 0, ptr addrspace(3) @local_memory_alignment_global.lds_i32, align 4
 // CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i32, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
+// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
 // CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i32, align 16
 // CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i32, align 32
 // CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i32, align 64
@@ -124,7 +124,7 @@ typedef double __attribute__((ext_vector_type(16))) double16;
 // CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f16, align 32
 // CHECK: store volatile float 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f32, align 4
 // CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f32, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
+// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
 // CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f32, align 16
 // CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f32, align 32
 // CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f32, align 64
@@ -393,7 +393,7 @@ kernel void local_memory_alignment_arg(
 // CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
 // CHECK: store volatile i32 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
 // CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
 // CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
 // CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
 // CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
@@ -411,7 +411,7 @@ kernel void local_memory_alignment_arg(
 // CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
 // CHECK: store volatile float 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
 // CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
 // CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
 // CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
 // CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index c84effe0c4b6e..747cc301feff6 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
 
 typedef char char3 __attribute__((ext_vector_type(3)));
 typedef char char8 __attribute__((ext_vector_type(8)));
@@ -9,10 +9,11 @@ typedef float float3 __attribute__((ext_vector_type(3)));
 typedef float float4 __attribute__((ext_vector_type(4)));
 
 // CHECK-LABEL: define dso_local spir_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 16 [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 12)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
+// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 16 [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
-// CHECK-NEXT:    store <3 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void kernel foo(global float3 *a, global float3 *b) {
@@ -20,11 +21,11 @@ void kernel foo(global float3 *a, global float3 *b) {
 }
 
 // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
-// CHECK-SAME: ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 12)) [[A:%.*]], ptr addrspace(1) nocapture noundef readonly align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
+// CHECK-SAME: ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 16)) [[A:%.*]], ptr addrspace(1) nocapture noundef readonly align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    store <3 x float> [[ASTYPE]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float4_to_float3(global float3 *a, global float4 *b) {
@@ -34,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
 // CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 16 [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -46,9 +47,9 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
 // CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 16 [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly align 16 initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float3_to_double2(global float3 *a, global double2 *b) {
@@ -56,11 +57,11 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
 }
 
 // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
-// CHECK-SAME: ptr addrspace(1) nocapture noundef writeonly align 8 initializes((0, 6)) [[A:%.*]], ptr addrspace(1) nocapture noundef readonly align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
+// CHECK-SAME: ptr addrspace(1) nocapture noundef writeonly align 8 initializes((0, 8)) [[A:%.*]], ptr addrspace(1) nocapture noundef readonly align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    store <3 x i16> [[ASTYPE]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel char8_to_short3(global short3 *a, global char8 *b) {
@@ -70,8 +71,8 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) {
 // CHECK-LABEL: define dso_local spir_func void @from_char3(
 // CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_char3(char3 a, global int *out) {
@@ -81,8 +82,8 @@ void from_char3(char3 a, global int *out) {
 // CHECK-LABEL: define dso_local spir_func void @from_short3(
 // CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_short3(short3 a, global long *out) {
@@ -90,11 +91,11 @@ void from_short3(short3 a, global long *out) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @scalar_to_char3(
-// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 3)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    store <3 x i8> [[ASTYPE]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void scalar_to_char3(int a, global char3 *out) {
@@ -102,11 +103,11 @@ void scalar_to_char3(int a, global char3 *out) {
 }
 
 // CHECK-LABEL: define dso_local spir_func void @scalar_to_short3(
-// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 6)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    store <3 x i16> [[ASTYPE]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void scalar_to_short3(long a, global short3 *out) {
diff --git a/clang/test/Driver/darwin-version.c b/clang/test/Driver/darwin-version.c
index ff05d4c10c487..a00bd832dc802 100644
--- a/clang/test/Driver/darwin-version.c
+++ b/clang/test/Driver/darwin-version.c
@@ -1,6 +1,6 @@
 // RUN: %clang -target armv6-apple-darwin9 -c %s -### 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX %s
-// CHECK-VERSION-OSX: "armv6k-apple-macosx10.5.0"
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-IOS %s
+// CHECK-VERSION-IOS: "armv6k-apple-ios5.0.0"
 // RUN: %clang -target armv6-apple-darwin9 -miphoneos-version-min=2.0 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-IOS2 %s
 // CHECK-VERSION-IOS2: "armv6k-apple-ios2.0.0"
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c
index d9bee7093bee2..444ac4526200f 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c
@@ -51,7 +51,6 @@
 // CHECK-NEXT:     FEAT_SHA3, FEAT_SHA512                                 Enable SHA512 and SHA3 support
 // CHECK-NEXT:     FEAT_SM4, FEAT_SM3                                     Enable SM3 and SM4 support
 // CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
-// CHECK-NEXT:     FEAT_SPEv1p2                                           Enable extra register in the Statistical Profiling Extension
 // CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
 // CHECK-NEXT:     FEAT_TLBIOS, FEAT_TLBIRANGE                            Enable Armv8.4-A TLB Range and Maintenance instructions
 // CHECK-NEXT:     FEAT_TRF                                               Enable Armv8.4-A Trace extension
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c
index b906074ce7659..6ddd52a4a7089 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c
@@ -46,7 +46,6 @@
 // CHECK-NEXT:     FEAT_SB                                                Enable Armv8.5-A Speculation Barrier
 // CHECK-NEXT:     FEAT_SEL2                                              Enable Armv8.4-A Secure Exception Level 2 extension
 // CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
-// CHECK-NEXT:     FEAT_SPEv1p2                                           Enable extra register in the Statistical Profiling Extension
 // CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
 // CHECK-NEXT:     FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
 // CHECK-NEXT:     FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c
index 2e147732d5c68..35399a3c85c62 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c
@@ -46,7 +46,6 @@
 // CHECK-NEXT:     FEAT_SB                                                Enable Armv8.5-A Speculation Barrier
 // CHECK-NEXT:     FEAT_SEL2                                              Enable Armv8.4-A Secure Exception Level 2 extension
 // CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
-// CHECK-NEXT:     FEAT_SPEv1p2                                           Enable extra register in the Statistical Profiling Extension
 // CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
 // CHECK-NEXT:     FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
 // CHECK-NEXT:     FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
index 01a97a00de542..a80d0f5c79ec1 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
@@ -63,7 +63,6 @@
 // CHECK-NEXT:     FEAT_SM4, FEAT_SM3                                     Enable SM3 and SM4 support
 // CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
 // CHECK-NEXT:     FEAT_SPECRES2                                          Enable Speculation Restriction Instruction
-// CHECK-NEXT:     FEAT_SPEv1p2                                           Enable extra register in the Statistical Profiling Extension
 // CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
 // CHECK-NEXT:     FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
 // CHECK-NEXT:     FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
@@ -77,4 +76,4 @@
 // CHECK-NEXT:     FEAT_UAO                                               Enable Armv8.2-A UAO PState
 // CHECK-NEXT:     FEAT_VHE                                               Enable Armv8.1-A Virtual Host extension
 // CHECK-NEXT:     FEAT_WFxT                                              Enable Armv8.7-A WFET and WFIT instruction
-// CHECK-NEXT:     FEAT_XS                                                Enable Armv8.7-A limited-TLB-maintenance instruction
\ No newline at end of file
+// CHECK-NEXT:     FEAT_XS                                                Enable Armv8.7-A limited-TLB-maintenance instruction
diff --git a/clang/test/Driver/ps4-sdk-root.c b/clang/test/Driver/ps4-sdk-root.c
index 78eb1ce2ba630..6e5f1e28958ad 100644
--- a/clang/test/Driver/ps4-sdk-root.c
+++ b/clang/test/Driver/ps4-sdk-root.c
@@ -1,10 +1,10 @@
 /// PS4 clang emits warnings when SDK headers (<SDKROOT>/target/include/) or
-/// libraries (<SDKROOT>/target/lib/) are missing, unless the user takes control
-/// of search paths, when corresponding existence checks are skipped.
+/// libraries (<SDKROOT>/target/lib/) are missing. If the the user takes control
+/// of header search paths, the existence check for <SDKROOT>/target/include is
+/// skipped.
 ///
 /// User control of header search is assumed if `--sysroot`, `-isysroot`,
-/// `-nostdinc` or `-nostdlibinc` is supplied. User control of library search
-/// is assumed if `--sysroot` is supplied.
+/// `-nostdinc` or `-nostdlibinc` is supplied.
 ///
 /// Warnings are emitted if a specified `-isysroot` or `--sysroot` does not
 /// exist.
@@ -46,22 +46,23 @@
 // RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -c -isysroot . 2>&1 | FileCheck -check-prefixes=NO-WARN %s
 // RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -c --sysroot=. 2>&1 | FileCheck -check-prefixes=NO-WARN %s
 
-/// --sysroot disables the existence check for libraries and headers.
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=. 2>&1 | FileCheck -check-prefix=NO-WARN %s
+/// --sysroot disables the existence check for headers. The check for libraries
+/// remains.
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=. 2>&1 | FileCheck -check-prefixes=WARN-SYS-LIBS,NO-WARN %s
 
 /// -isysroot overrides --sysroot for header search, but not library search.
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -isysroot . --sysroot=.. 2>&1 | FileCheck -check-prefixes=ISYSTEM,NO-WARN %s
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=.. -isysroot . 2>&1 | FileCheck -check-prefixes=ISYSTEM,NO-WARN %s
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -isysroot . --sysroot=%t.inconly 2>&1 | FileCheck -check-prefixes=ISYSTEM,WARN-SYS-LIBS,NO-WARN %s
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=%t.inconly -isysroot . 2>&1 | FileCheck -check-prefixes=ISYSTEM,WARN-SYS-LIBS,NO-WARN %s
 
 /// Warnings are emitted if non-existent --sysroot/-isysroot are supplied.
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot . 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -isysroot foo --sysroot=. 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot bar 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYSROOT2,NO-WARN %s
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot %t.both 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYS-LIBS,NO-WARN %s
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s -isysroot foo --sysroot=%t.both 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
+// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot bar 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYSROOT2,WARN-SYS-LIBS,NO-WARN %s
 
 // NO-WARN-NOT: {{warning:|error:}}
-// WARN-SYS-LIBS: warning: unable to find PS4 system libraries directory
-// WARN-SYS-HEADERS: warning: unable to find PS4 system headers directory
 // WARN-SYSROOT: warning: no such sysroot directory: 'foo'
 // WARN-SYSROOT2: warning: no such sysroot directory: 'bar'
+// WARN-SYS-LIBS: warning: unable to find PS4 system libraries directory
+// WARN-SYS-HEADERS: warning: unable to find PS4 system headers directory
 // NO-WARN-NOT: {{warning:|error:}}
 // ISYSTEM: "-cc1"{{.*}}"-internal-externc-isystem" "./target/include"
diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c
index 53f89a914f4fa..9dd35c40619b6 100644
--- a/clang/test/Driver/ps5-linker.c
+++ b/clang/test/Driver/ps5-linker.c
@@ -174,25 +174,26 @@
 
 // Test implicit library search paths are supplied to the linker, after any
 // search paths specified by the user. <sdk-root>/target/lib is implicitly
-// added if it exists and no --sysroot is specified. CRT objects are found
-// there. "." is always implicitly added to library search paths. This is
-// long-standing behavior, unique to PlayStation toolchains.
+// added if it exists. CRT objects are found there. "." is always implicitly
+// added to library search paths. This is long-standing behavior, unique to
+// PlayStation toolchains.
 
 // RUN: rm -rf %t.dir && mkdir %t.dir
 // RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### -Luser 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s
-// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### -Luser --sysroot=%t.dir 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s
+// RUN: %clang --target=x64_64-sie-ps5 %s -### -Luser --sysroot=%t.dir 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s
 
 // CHECK-NO-TARGETLIB: {{ld(\.exe)?}}"
 // CHECK-NO-TARGETLIB-SAME: "-Luser"
 // CHECK-NO-TARGETLIB-NOT: "-L{{.*[/\\]}}target/lib"
 // CHECK-NO-TARGETLIB-SAME: "-L."
 
-// RUN: mkdir -p %t.dir/target/lib
-// RUN: touch %t.dir/target/lib/crti.o
-// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### -Luser 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s
+// RUN: mkdir -p %t.dir/myroot/target/lib
+// RUN: touch %t.dir/myroot/target/lib/crti.o
+// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir/myroot %clang --target=x64_64-sie-ps5 %s -### -Luser 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s
+// RUN: %clang --target=x64_64-sie-ps5 %s -### -Luser --sysroot=%t.dir/myroot 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s
 
 // CHECK-TARGETLIB: {{ld(\.exe)?}}"
 // CHECK-TARGETLIB-SAME: "-Luser"
-// CHECK-TARGETLIB-SAME: "-L{{.*[/\\]}}target/lib"
+// CHECK-TARGETLIB-SAME: "-L{{.*}}myroot{{/|\\\\}}target{{/|\\\\}}lib"
 // CHECK-TARGETLIB-SAME: "-L."
-// CHECK-TARGETLIB-SAME: "{{.*[/\\]}}target{{/|\\\\}}lib{{/|\\\\}}crti.o"
+// CHECK-TARGETLIB-SAME: "{{.*}}myroot{{/|\\\\}}target{{/|\\\\}}lib{{/|\\\\}}crti.o"
diff --git a/clang/test/Driver/ps5-sdk-root.c b/clang/test/Driver/ps5-sdk-root.c
index a12e0dfffeb58..16ef2cc01f5e7 100644
--- a/clang/test/Driver/ps5-sdk-root.c
+++ b/clang/test/Driver/ps5-sdk-root.c
@@ -1,12 +1,12 @@
 /// (Essentially identical to ps4-sdk-root.c except for the target.)
 
 /// PS5 clang emits warnings when SDK headers (<SDKROOT>/target/include/) or
-/// libraries (<SDKROOT>/target/lib/) are missing, unless the user takes control
-/// of search paths, when corresponding existence checks are skipped.
+/// libraries (<SDKROOT>/target/lib/) are missing. If the the user takes control
+/// of header search paths, the existence check for <SDKROOT>/target/include is
+/// skipped.
 ///
 /// User control of header search is assumed if `--sysroot`, `-isysroot`,
-/// `-nostdinc` or `-nostdlibinc` is supplied. User control of library search
-/// is assumed if `--sysroot` is supplied.
+/// `-nostdinc` or `-nostdlibinc` is supplied.
 ///
 /// Warnings are emitted if a specified `-isysroot` or `--sysroot` does not
 /// exist.
@@ -48,22 +48,23 @@
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -c -isysroot . 2>&1 | FileCheck -check-prefixes=NO-WARN %s
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -c --sysroot=. 2>&1 | FileCheck -check-prefixes=NO-WARN %s
 
-/// --sysroot disables the existence check for libraries and headers.
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=. 2>&1 | FileCheck -check-prefix=NO-WARN %s
+/// --sysroot disables the existence check for headers. The check for libraries
+/// remains.
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=. 2>&1 | FileCheck -check-prefixes=WARN-SYS-LIBS,NO-WARN %s
 
 /// -isysroot overrides --sysroot for header search, but not library search.
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -isysroot . --sysroot=.. 2>&1 | FileCheck -check-prefixes=ISYSTEM,NO-WARN %s
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=.. -isysroot . 2>&1 | FileCheck -check-prefixes=ISYSTEM,NO-WARN %s
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -isysroot . --sysroot=%t.inconly 2>&1 | FileCheck -check-prefixes=ISYSTEM,WARN-SYS-LIBS,NO-WARN %s
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=%t.inconly -isysroot . 2>&1 | FileCheck -check-prefixes=ISYSTEM,WARN-SYS-LIBS,NO-WARN %s
 
 /// Warnings are emitted if non-existent --sysroot/-isysroot are supplied.
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot . 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -isysroot foo --sysroot=. 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot bar 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYSROOT2,NO-WARN %s
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot %t.both 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYS-LIBS,NO-WARN %s
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s -isysroot foo --sysroot=%t.both 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,NO-WARN %s
+// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s --sysroot=foo -isysroot bar 2>&1 | FileCheck -check-prefixes=WARN-SYSROOT,WARN-SYSROOT2,WARN-SYS-LIBS,NO-WARN %s
 
 // NO-WARN-NOT: {{warning:|error:}}
-// WARN-SYS-LIBS: warning: unable to find PS5 system libraries directory
-// WARN-SYS-HEADERS: warning: unable to find PS5 system headers directory
 // WARN-SYSROOT: warning: no such sysroot directory: 'foo'
 // WARN-SYSROOT2: warning: no such sysroot directory: 'bar'
+// WARN-SYS-LIBS: warning: unable to find PS5 system libraries directory
+// WARN-SYS-HEADERS: warning: unable to find PS5 system headers directory
 // NO-WARN-NOT: {{warning:|error:}}
 // ISYSTEM: "-cc1"{{.*}}"-internal-externc-isystem" "./target/include"
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3ea6c02d6b384..3afcdf8c9fe5c 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -27,6 +27,7 @@
 // CHECK-SAME: {{^}}, sm_90
 // CHECK-SAME: {{^}}, sm_90a
 // CHECK-SAME: {{^}}, sm_100
+// CHECK-SAME: {{^}}, sm_100a
 // CHECK-SAME: {{^}}, gfx600
 // CHECK-SAME: {{^}}, gfx601
 // CHECK-SAME: {{^}}, gfx602
diff --git a/clang/test/Modules/module-local-hidden-friend.cppm b/clang/test/Modules/module-local-hidden-friend.cppm
new file mode 100644
index 0000000000000..6ad0f5be51db1
--- /dev/null
+++ b/clang/test/Modules/module-local-hidden-friend.cppm
@@ -0,0 +1,88 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:     -fmodule-file=a=%t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use.cc -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm \
+// RUN:     -fsyntax-only -verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm \
+// RUN:     -fmodule-file=a=%t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use.cc -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm \
+// RUN:     -fsyntax-only -verify
+
+//--- a.cppm
+export module a;
+
+namespace n {
+}
+
+//--- ordering.mock.h
+namespace std {
+  class strong_ordering {
+  public:
+    int n;
+    static const strong_ordering less, equal, greater;
+    constexpr bool operator==(int n) const noexcept { return this->n == n;}
+    constexpr bool operator!=(int n) const noexcept { return this->n != n;}
+  };
+  constexpr strong_ordering strong_ordering::less = {-1};
+  constexpr strong_ordering strong_ordering::equal = {0};
+  constexpr strong_ordering strong_ordering::greater = {1};
+
+  class partial_ordering {
+  public:
+    long n;
+    static const partial_ordering less, equal, greater, equivalent, unordered;
+    constexpr bool operator==(long n) const noexcept { return this->n == n;}
+    constexpr bool operator!=(long n) const noexcept { return this->n != n;}
+  };
+  constexpr partial_ordering partial_ordering::less = {-1};
+  constexpr partial_ordering partial_ordering::equal = {0};
+  constexpr partial_ordering partial_ordering::greater = {1};
+  constexpr partial_ordering partial_ordering::equivalent = {0};
+  constexpr partial_ordering partial_ordering::unordered = {-127};
+} // namespace std
+
+//--- b.cppm
+module;
+#include "ordering.mock.h"
+export module b;
+
+import a;
+
+namespace n {
+
+struct monostate {
+	friend constexpr bool operator==(monostate, monostate) = default;
+};
+
+export struct wrapper {
+	friend constexpr bool operator==(wrapper const &LHS, wrapper const &RHS) {
+        return LHS.m_value == RHS.m_value;
+    }
+
+	monostate m_value;
+};
+
+struct monostate2 {
+	auto operator<=>(monostate2 const &) const & = default;
+};
+
+export struct wrapper2 {
+	friend bool operator==(wrapper2 const &LHS, wrapper2 const &RHS) = default;
+
+	monostate2 m_value;
+};
+
+} // namespace n
+
+//--- use.cc
+// expected-no-diagnostics
+import b;
+
+static_assert(n::wrapper() == n::wrapper());
+static_assert(n::wrapper2() == n::wrapper2());
diff --git a/clang/test/Modules/visibility-for-implicit-global-module.cppm b/clang/test/Modules/visibility-for-implicit-global-module.cppm
new file mode 100644
index 0000000000000..c55f2c3beee20
--- /dev/null
+++ b/clang/test/Modules/visibility-for-implicit-global-module.cppm
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.interface.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/a.impl.cc -fmodule-file=a:interface=%t/a.pcm \
+// RUN:     -verify -fsyntax-only
+
+//--- a.interface.cppm
+export module a:interface;
+extern "C++" constexpr int a = 43;
+
+//--- a.impl.cc
+// expected-no-diagnostics
+module a:impl;
+import :interface;
+static_assert(a == 43);
+
diff --git a/clang/test/Modules/vtable-in-explicit-instantiation.cppm b/clang/test/Modules/vtable-in-explicit-instantiation.cppm
new file mode 100644
index 0000000000000..b090607751744
--- /dev/null
+++ b/clang/test/Modules/vtable-in-explicit-instantiation.cppm
@@ -0,0 +1,34 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/a.cc -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cc
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-reduced-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/a.cc -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cc
+
+//--- a.cppm
+export module a;
+class base {
+public:
+    ~base() = default;
+    virtual void foo();
+};
+
+template <class T>
+class a : public base {
+public:
+    virtual void foo() override;
+};
+
+extern template class a<int>;
+
+//--- a.cc
+module a;
+
+template <class T>
+void a<T>::foo() {}
+
+template class a<int>;
+// CHECK: _ZTVW1a1aIiE
diff --git a/clang/test/Sema/Inputs/lifetime-analysis.h b/clang/test/Sema/Inputs/lifetime-analysis.h
index f888e6ab94bb6..d318033ff0cc4 100644
--- a/clang/test/Sema/Inputs/lifetime-analysis.h
+++ b/clang/test/Sema/Inputs/lifetime-analysis.h
@@ -52,7 +52,7 @@ struct vector {
 
   void push_back(const T&);
   void push_back(T&&);
-  
+  const T& back() const;
   void insert(iterator, T&&);
 };
 
diff --git a/clang/test/Sema/atomic-ops.c b/clang/test/Sema/atomic-ops.c
index 2405f804d0da5..725a12060d4e0 100644
--- a/clang/test/Sema/atomic-ops.c
+++ b/clang/test/Sema/atomic-ops.c
@@ -284,11 +284,29 @@ void f(_Atomic(int) *i, const _Atomic(int) *ci,
 
   const volatile int flag_k = 0;
   volatile int flag = 0;
-  (void)(int)__atomic_test_and_set(&flag_k, memory_order_seq_cst); // expected-warning {{passing 'const volatile int *' to parameter of type 'volatile void *'}}
+  (void)(int)__atomic_test_and_set(&flag_k, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const volatile int *' invalid)}}
   (void)(int)__atomic_test_and_set(&flag, memory_order_seq_cst);
-  __atomic_clear(&flag_k, memory_order_seq_cst); // expected-warning {{passing 'const volatile int *' to parameter of type 'volatile void *'}}
+  __atomic_clear(&flag_k, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const volatile int *' invalid)}}
   __atomic_clear(&flag, memory_order_seq_cst);
   (int)__atomic_clear(&flag, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
+  __atomic_clear(0x8000, memory_order_seq_cst); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  __atomic_clear(&flag, memory_order_consume); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __atomic_clear(&flag, memory_order_acquire); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __atomic_clear(&flag, memory_order_acq_rel); // expected-warning {{memory order argument to atomic operation is invalid}}
+  _Bool lock;
+  __atomic_test_and_set(lock, memory_order_acquire); // expected-error {{address argument to atomic builtin must be a pointer}}
+  __atomic_clear(lock, memory_order_release); // expected-error {{address argument to atomic builtin must be a pointer}}
+
+  // These intrinsics accept any non-const pointer type (including
+  // pointer-to-incomplete), and access the first byte.
+  __atomic_test_and_set((void*)0x8000, memory_order_seq_cst);
+  __atomic_test_and_set((char*)0x8000, memory_order_seq_cst);
+  __atomic_test_and_set((int*)0x8000, memory_order_seq_cst);
+  __atomic_test_and_set((struct incomplete*)0x8000, memory_order_seq_cst);
+  __atomic_clear((void*)0x8000, memory_order_seq_cst);
+  __atomic_clear((char*)0x8000, memory_order_seq_cst);
+  __atomic_clear((int*)0x8000, memory_order_seq_cst);
+  __atomic_clear((struct incomplete*)0x8000, memory_order_seq_cst);
 
   __c11_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
   __c11_atomic_store(ci, 0, memory_order_release); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
diff --git a/clang/test/Sema/pragma-clang-section.c b/clang/test/Sema/pragma-clang-section.c
index 458c91c2cf31c..e33e1dfe8cbef 100644
--- a/clang/test/Sema/pragma-clang-section.c
+++ b/clang/test/Sema/pragma-clang-section.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple arm-none-eabi
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple arm64-windows-msvc
 #pragma clang section bss = "mybss.1" data = "mydata.1" rodata = "myrodata.1" text = "mytext.1" // expected-note 2 {{#pragma entered here}}
 #pragma clang section bss="" data="" rodata="" text=""
 #pragma clang section
diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
index 4c19367bb7f3d..04bb1330ded4c 100644
--- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
+++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
@@ -806,3 +806,49 @@ std::string_view test2(int c, std::string_view sv) {
 }
 
 } // namespace GH120206
+
+namespace GH120543 {
+struct S {
+  std::string_view sv;
+  std::string s;
+};
+struct Q {
+  const S* get() const [[clang::lifetimebound]];
+};
+
+std::string_view foo(std::string_view sv [[clang::lifetimebound]]);
+
+void test1() {
+  std::string_view k1 = S().sv; // OK
+  std::string_view k2 = S().s; // expected-warning {{object backing the pointer will}}
+  
+  std::string_view k3 = Q().get()->sv; // OK
+  std::string_view k4  = Q().get()->s; // expected-warning {{object backing the pointer will}}
+
+  std::string_view lb1 = foo(S().s); // expected-warning {{object backing the pointer will}}
+  std::string_view lb2 = foo(Q().get()->s); // expected-warning {{object backing the pointer will}}
+}
+
+struct Bar {};
+struct Foo {
+  std::vector<Bar> v;
+};
+Foo getFoo();
+void test2() {
+  const Foo& foo = getFoo();
+  const Bar& bar = foo.v.back(); // OK
+}
+
+struct Foo2 {
+   std::unique_ptr<Bar> bar;
+};
+
+struct Test {
+  Test(Foo2 foo) : bar(foo.bar.get()), // OK
+      storage(std::move(foo.bar)) {};
+
+  Bar* bar;
+  std::unique_ptr<Bar> storage;
+};
+
+} // namespace GH120543
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index c990dc78deb9b..a1234b67acd6d 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -1471,9 +1471,9 @@ namespace ConvertedConstantExpr {
   // useless note and instead just point to the non-constant subexpression.
   enum class E {
     em = m,
-    en = n, // expected-error {{not a constant expression}} expected-note {{initializer of 'n' is unknown}}
+    en = n, // cxx23-note {{initializer of 'n' is not a constant expression}} expected-error {{enumerator value is not a constant expression}} cxx11_20-note {{initializer of 'n' is unknown}}
     eo = (m + // expected-error {{not a constant expression}}
-          n // expected-note {{initializer of 'n' is unknown}}
+          n // cxx23-note {{initializer of 'n' is not a constant expression}} cxx11_20-note {{initializer of 'n' is unknown}}
           ),
     eq = reinterpret_cast<long>((int*)0) // expected-error {{not a constant expression}} expected-note {{reinterpret_cast}}
   };
@@ -2007,7 +2007,8 @@ namespace ConstexprConstructorRecovery {
 
 namespace Lifetime {
   void f() {
-    constexpr int &n = n; // expected-error {{constant expression}} expected-note {{use of reference outside its lifetime}} expected-warning {{not yet bound to a value}}
+    constexpr int &n = n; // expected-error {{constant expression}} cxx23-note {{reference to 'n' is not a constant expression}} cxx23-note {{address of non-static constexpr variable 'n' may differ}} expected-warning {{not yet bound to a value}}
+                          // cxx11_20-note@-1 {{use of reference outside its lifetime is not allowed in a constant expression}}
     constexpr int m = m; // expected-error {{constant expression}} expected-note {{read of object outside its lifetime}}
   }
 
@@ -2427,15 +2428,15 @@ namespace array_size {
   template<typename T> void f1(T t) {
     constexpr int k = t.size();
   }
-  template<typename T> void f2(const T &t) { // expected-note 2{{declared here}}
-    constexpr int k = t.size(); // expected-error 2{{constant}} expected-note 2{{function parameter 't' with unknown value cannot be used in a constant expression}}
+  template<typename T> void f2(const T &t) { // cxx11_20-note 2{{declared here}}
+    constexpr int k = t.size();  // cxx11_20-error 2{{constexpr variable 'k' must be initialized by a constant expression}} cxx11_20-note 2{{function parameter 't' with unknown value cannot be used in a constant expression}}
   }
   template<typename T> void f3(const T &t) {
     constexpr int k = T::size();
   }
   void g(array<3> a) {
     f1(a);
-    f2(a); // expected-note {{instantiation of}}
+    f2(a); // cxx11_20-note {{in instantiation of function template}}
     f3(a);
   }
 
@@ -2444,8 +2445,9 @@ namespace array_size {
   };
   void h(array_nonstatic<3> a) {
     f1(a);
-    f2(a); // expected-note {{instantiation of}}
+    f2(a); // cxx11_20-note {{instantiation of}}
   }
+  //static_assert(f2(array_size::array<3>{}));
 }
 
 namespace flexible_array {
diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
index 36d4d25c48471..85720606fe9de 100644
--- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
@@ -308,8 +308,7 @@ namespace TypeId {
   static_assert(&B2().ti1 == &typeid(B));
   static_assert(&B2().ti2 == &typeid(B2));
   extern B2 extern_b2;
-  // expected-note@+1 {{typeid applied to object 'extern_b2' whose dynamic type is not constant}}
-  static_assert(&typeid(extern_b2) == &typeid(B2)); // expected-error {{constant expression}}
+  static_assert(&typeid(extern_b2) == &typeid(B2));
 
   constexpr B2 b2;
   constexpr const B &b1 = b2;
diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
new file mode 100644
index 0000000000000..0f85c60629eed
--- /dev/null
+++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
@@ -0,0 +1,156 @@
+// RUN: %clang_cc1 -std=c++23 -verify %s
+
+using size_t = decltype(sizeof(0));
+
+namespace std {
+struct type_info {
+  const char* name() const noexcept(true);
+};
+}
+
+template <typename T, size_t N>
+constexpr size_t array_size(T (&)[N]) {
+  return N;
+}
+
+void use_array(int const (&gold_medal_mel)[2]) {
+  constexpr auto gold = array_size(gold_medal_mel); // ok
+}
+
+constexpr auto olympic_mile() {
+  const int ledecky = 1500;
+  return []{ return ledecky; };
+}
+static_assert(olympic_mile()() == 1500); // ok
+
+struct Swim {
+  constexpr int phelps() { return 28; }
+  virtual constexpr int lochte() { return 12; }
+  int coughlin = 12;
+};
+
+constexpr int how_many(Swim& swam) {
+  Swim* p = &swam;
+  return (p + 1 - 1)->phelps();
+}
+
+void splash(Swim& swam) {
+  static_assert(swam.phelps() == 28);     // ok
+  static_assert((&swam)->phelps() == 28); // ok
+  Swim* pswam = &swam;                    // expected-note {{declared here}}
+  static_assert(pswam->phelps() == 28);   // expected-error {{static assertion expression is not an integral constant expression}}
+                                          // expected-note@-1 {{read of non-constexpr variable 'pswam' is not allowed in a constant expression}}
+  static_assert(how_many(swam) == 28);    // ok
+  static_assert(Swim().lochte() == 12);   // ok
+  static_assert(swam.lochte() == 12);     // expected-error {{static assertion expression is not an integral constant expression}}
+  static_assert(swam.coughlin == 12);     // expected-error {{static assertion expression is not an integral constant expression}}
+}
+
+extern Swim dc;
+extern Swim& trident; // expected-note {{declared here}}
+
+constexpr auto& sandeno   = typeid(dc);         // ok: can only be typeid(Swim)
+constexpr auto& gallagher = typeid(trident);    // expected-error {{constexpr variable 'gallagher' must be initialized by a constant expression}}
+                                                // expected-note@-1 {{initializer of 'trident' is not a constant expression}}
+
+namespace explicitThis {
+struct C {
+  constexpr int b()  { return 0; };
+
+  constexpr int f(this C &c) {
+    return c.b();     // ok
+  }
+
+   constexpr int g() {
+    return f();       // ok
+  }
+};
+
+void g() {
+  C c;
+  constexpr int x = c.f();
+  constexpr int y = c.g();
+}
+}
+
+namespace GH64376 {
+template<int V>
+struct Test {
+    static constexpr int value = V;
+};
+
+int main() {
+    Test<124> test;
+    auto& test2 = test;
+
+    if constexpr(test2.value > 3) {
+       return 1;
+    }
+
+    return 0;
+}
+}
+
+namespace GH30060 {
+template<int V>
+struct A {
+  static constexpr int value = V;
+};
+
+template<class T>
+static void test1(T &f) {
+    A<f.value> bar;
+}
+
+void g() {
+    A<42> f;
+
+    test1(f);
+}
+}
+
+namespace GH26067 {
+struct A {
+    constexpr operator int() const { return 42; }
+};
+
+template <int>
+void f() {}
+
+void test(const A& value) {
+    f<value>();
+}
+
+int main() {
+    A a{};
+    test(a);
+}
+}
+
+namespace GH34365 {
+void g() {
+  auto f = []() { return 42; };
+  constexpr int x = f();
+  [](auto f) { constexpr int x = f(); }(f);
+  [](auto &f) { constexpr int x = f(); }(f);
+  (void)[&]() { constexpr int x = f(); };
+}
+}
+
+namespace GH118063 {
+template <unsigned int N>
+struct array {
+    constexpr auto size() const -> unsigned int {
+        return N;
+    }
+};
+
+constexpr auto f(array<5> const& arr) {
+    return array<arr.size()>{}.size();
+}
+
+int g() {
+    array<5> arr {};
+    static_assert(f(arr) == 5);
+}
+}
diff --git a/clang/test/SemaCXX/warn-base-type-qualifiers.cpp b/clang/test/SemaCXX/warn-base-type-qualifiers.cpp
new file mode 100644
index 0000000000000..b9fd577c574b9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-base-type-qualifiers.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -std=c++11 -Wignored-qualifiers -verify
+
+template <typename T> struct add_const {
+  using type = const T;
+};
+template <typename T> using add_const_t = typename add_const<T>::type;
+
+class A { };
+
+typedef const A A_Const;
+class B : public A_Const { }; // expected-warning {{'const' qualifier on base class type 'A_Const' (aka 'const A') have no effect}} \
+                              // expected-note {{base class 'A_Const' (aka 'const A') specified here}}
+
+typedef const volatile A A_Const_Volatile;
+class C : public A_Const_Volatile { }; // expected-warning {{'const volatile' qualifiers on base class type 'A_Const_Volatile' (aka 'const volatile A') have no effect}} \
+                                       // expected-note {{base class 'A_Const_Volatile' (aka 'const volatile A') specified here}}
+
+struct D {
+  D(int);
+};
+
+template <typename T> struct E : T { // expected-warning {{'const' qualifier on base class type 'const D' have no effect}} \
+                                     // expected-note {{base class 'const D' specified here}}
+  using T::T;
+  E(int &) : E(0) {}
+};
+E<const D> e(1); // expected-note {{in instantiation of template class 'E<const D>' requested here}}
+
+template <typename T>
+struct G : add_const<T>::type { // expected-warning {{'const' qualifier on base class type 'add_const<D>::type' (aka 'const D') have no effect}} \
+                                // expected-note {{base class 'add_const<D>::type' (aka 'const D') specified here}}
+  using T::T;
+  G(int &) : G(0) {}
+};
+G<D> g(1); // expected-note {{in instantiation of template class 'G<D>' requested here}}
diff --git a/clang/test/SemaOpenCL/invalid-block.cl b/clang/test/SemaOpenCL/invalid-block.cl
index 6c918d302f801..1605369000429 100644
--- a/clang/test/SemaOpenCL/invalid-block.cl
+++ b/clang/test/SemaOpenCL/invalid-block.cl
@@ -65,6 +65,8 @@ void f5(int i) {
   bl2_t arr[] = {bl1, bl2}; // expected-error {{array of 'bl2_t' (aka 'int (__generic ^const)(__private int)') type is invalid in OpenCL}}
   int tmp = i ? bl1(i)      // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
               : bl2(i);     // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
+  bl2_t bref = i ? bl1      // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
+                 : bl2;     // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
 }
 // A block pointer type and all pointer operations are disallowed
 void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type 'bl2_t' (aka 'int (__generic ^const)(__private int)') is invalid in OpenCL}}
diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
index 7cb5cfc10b9a7..6c1a229a9fdda 100644
--- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
+++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
@@ -737,3 +737,17 @@ ptr<U> make_item(auto &&args)
 ptr<char> p;
 
 } // namespace GH114685
+
+namespace GH123472 {
+
+consteval bool fn() { return true; }
+
+struct S {
+  template <typename T>
+  static consteval void mfn() requires (bool(&fn));
+};
+
+template <typename T>
+consteval void S::mfn() requires (bool(&fn)) {}
+
+}
diff --git a/clang/tools/diagtool/DiagnosticNames.cpp b/clang/tools/diagtool/DiagnosticNames.cpp
index c3a3002889c73..4ac9825848ef3 100644
--- a/clang/tools/diagtool/DiagnosticNames.cpp
+++ b/clang/tools/diagtool/DiagnosticNames.cpp
@@ -9,6 +9,7 @@
 #include "DiagnosticNames.h"
 #include "clang/Basic/AllDiagnostics.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringTable.h"
 
 using namespace clang;
 using namespace diagtool;
@@ -68,7 +69,7 @@ static const GroupRecord OptionTable[] = {
 };
 
 llvm::StringRef GroupRecord::getName() const {
-  return StringRef(DiagGroupNames + NameOffset + 1, DiagGroupNames[NameOffset]);
+  return DiagGroupNames[NameOffset];
 }
 
 GroupRecord::subgroup_iterator GroupRecord::subgroup_begin() const {
diff --git a/clang/unittests/Analysis/CFGBuildResult.h b/clang/unittests/Analysis/CFGBuildResult.h
index 72ad1cc7ce401..0d5539005840d 100644
--- a/clang/unittests/Analysis/CFGBuildResult.h
+++ b/clang/unittests/Analysis/CFGBuildResult.h
@@ -65,8 +65,8 @@ class CFGCallback : public ast_matchers::MatchFinder::MatchCallback {
 template <typename FuncMatcherT = ast_matchers::internal::TrueMatcher>
 BuildResult BuildCFG(const char *Code, CFG::BuildOptions Options = {},
                      FuncMatcherT FuncMatcher = ast_matchers::anything()) {
-  std::vector<std::string> Args = {"-std=c++11",
-                                   "-fno-delayed-template-parsing"};
+  const std::vector<std::string> Args = {
+      "-std=c++11", "-fno-delayed-template-parsing", "-Wno-everything"};
   std::unique_ptr<ASTUnit> AST = tooling::buildASTFromCodeWithArgs(Code, Args);
   if (!AST)
     return BuildResult::ToolFailed;
diff --git a/clang/unittests/Analysis/CFGTest.cpp b/clang/unittests/Analysis/CFGTest.cpp
index 2b27da0081425..46a6751391cf5 100644
--- a/clang/unittests/Analysis/CFGTest.cpp
+++ b/clang/unittests/Analysis/CFGTest.cpp
@@ -195,7 +195,6 @@ TEST(CFG, ElementRefIterator) {
   // Reverse, non-const version
   Index = MainBlockSize;
   for (CFGBlock::CFGElementRef ElementRef : MainBlock->rrefs()) {
-    llvm::errs() << Index << '\n';
     EXPECT_EQ(ElementRef.getParent(), MainBlock);
     EXPECT_EQ(ElementRef.getIndexInBlock(), Index);
     EXPECT_TRUE(ElementRef->getAs<CFGStmt>());
diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
index 137baab53301a..cc277d56b37a2 100644
--- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
+++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
@@ -14,6 +14,7 @@
 #include "clang/Tooling/Tooling.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <cassert>
 #include <cctype>
 
 namespace clang {
@@ -1609,4 +1610,398 @@ TEST(ExprMutationAnalyzerTest, ReproduceFailureMinimal) {
       match(withEnclosingCompound(declRefTo("x")), AST11->getASTContext());
   EXPECT_FALSE(isMutated(Results11, AST11.get()));
 }
+
+static bool isPointeeMutated(const SmallVectorImpl<BoundNodes> &Results,
+                             ASTUnit *AST) {
+  const auto *const S = selectFirst<Stmt>("stmt", Results);
+  const auto *const E = selectFirst<Expr>("expr", Results);
+  assert(S && E);
+  TraversalKindScope RAII(AST->getASTContext(), TK_AsIs);
+  return ExprMutationAnalyzer(*S, AST->getASTContext()).isPointeeMutated(E);
+}
+
+static bool isDeclPointeeMutated(const SmallVectorImpl<BoundNodes> &Results,
+                                 ASTUnit *AST) {
+  const auto *const S = selectFirst<Stmt>("stmt", Results);
+  const auto *const D = selectFirst<Decl>("decl", Results);
+  assert(S && D);
+  TraversalKindScope RAII(AST->getASTContext(), TK_AsIs);
+  return ExprMutationAnalyzer(*S, AST->getASTContext()).isPointeeMutated(D);
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByAssign) {
+  {
+    const std::string Code = "void f() { int* x = nullptr; int b = *x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "void f() { int* x = nullptr; *x = 100; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "void f() { int* x = nullptr; (*x)++; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-unused-value"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByMember) {
+  {
+    const std::string Code =
+        "struct A { int v; }; void f() { A* x = nullptr; int b = x->v; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "struct A { int v; }; void f() { A* x = nullptr; x->v = 1; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "struct A { int v; }; void f() { A* x = nullptr; x->v++; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-unused-value"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByMethod) {
+  {
+    const std::string Code = "struct A { int v; void foo(); };"
+                             "void f() { A* x = nullptr; x->foo(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "struct A { int v; void foo() const; };"
+                             "void f() { A* x = nullptr; x->foo(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+}
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByOperatorOverload) {
+  {
+    const std::string Code = "struct A { int v; int operator++(); };"
+                             "void f() { A* x = nullptr; x->operator++(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "struct A { int v; int operator++() const; };"
+                             "void f() { A* x = nullptr; x->operator++(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByInitToNonConst) {
+  {
+    const std::string Code = "void f() { int* x = nullptr; int const* b = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "void f() { int* x = nullptr; int* b = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "void f() { int* x = nullptr; int* const b = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByAssignToNonConst) {
+  {
+    const std::string Code =
+        "void f() { int* x = nullptr; int const* b; b = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "void f() { int* x = nullptr; int* b; b = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByPassAsArgument) {
+  {
+    const std::string Code =
+        "void b(int const*); void f() { int* x = nullptr; b(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "void b(int *); void f() { int* x = nullptr; b(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByPassAsArgumentInConstruct) {
+  {
+    const std::string Code = "struct A { A(int const*); };"
+                             "void f() { int *x; A a{x}; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "struct A { A(int const*); };"
+                             "void f() { int *x; A a(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "struct A { A(int const*); };"
+                             "void f() { int *x; A a = x; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "struct A { A(int *); };"
+                             "void f() { int *x; A a{x}; }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest,
+     PointeeMutatedByPassAsArgumentInTemplateConstruct) {
+  const std::string Code = "template<class T> void f() { int *x; new T(x); }";
+  auto AST = buildASTFromCodeWithArgs(Code, {"-fno-delayed-template-parsing"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByPassAsArgumentInInitList) {
+  {
+    const std::string Code =
+        "namespace std {"
+        "template<class T>"
+        "struct initializer_list{ T const* begin; T const* end; };"
+        "}"
+        "void f() { int *x; std::initializer_list<int*> a{x, x, x}; }";
+    auto AST =
+        buildASTFromCodeWithArgs(Code, {"-fno-delayed-template-parsing"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByThis) {
+  {
+    const std::string Code =
+        "struct A { void m() const; }; void f() { A* x = nullptr; x->m(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "struct A { void m(); }; void f() { A* x = nullptr; x->m(); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByExplicitCastToNonConst) {
+  {
+    const std::string Code =
+        "void f() { int* x = nullptr; static_cast<int const*>(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "void f() { int* x = nullptr; static_cast<int*>(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByConstCastToNonConst) {
+  // const_cast to non-const even treat as mutated.
+  {
+    const std::string Code =
+        "void f() { int* x = nullptr; const_cast<int const*>(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code =
+        "void f() { int* x = nullptr; const_cast<int*>(x); }";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByUnresolvedCall) {
+  const std::string Code =
+      "template <class T> struct S;"
+      "template <class T> void f() { S<T> s; int* x = nullptr; s.m(x); }";
+  auto AST = buildASTFromCodeWithArgs(
+      Code, {"-fno-delayed-template-parsing", "-Wno-everything"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByAssignToUnknownType) {
+  {
+    const std::string Code = "template <class T> void f() {"
+                             "  int* x = nullptr;"
+                             "  T t = x;"
+                             "}";
+    auto AST = buildASTFromCodeWithArgs(
+        Code, {"-fno-delayed-template-parsing", "-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = "template <class T> void f() {"
+                             "  int* x = nullptr;"
+                             "  typename T::t t = x;"
+                             "}";
+    auto AST = buildASTFromCodeWithArgs(
+        Code, {"-fno-delayed-template-parsing", "-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByLambdaCapture) {
+  const std::string Code = R"(
+      void f() {
+        int* x;
+        [x] () { *x = 1; };
+      })";
+  auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isDeclPointeeMutated(Results, AST.get()));
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByLambdaCaptureInit) {
+  const std::string Code = R"(
+      void f() {
+        int* x;
+        [t = x] () { *t = 1; };
+      })";
+  auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isDeclPointeeMutated(Results, AST.get()));
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByPointerArithmeticAdd) {
+  {
+    const std::string Code = R"(
+      void f() {
+        int* x;
+        int* y = x + 1;
+      })";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+  }
+  {
+    const std::string Code = R"(
+      void f() {
+        int* x;
+        x + 1;
+      })";
+    auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+    auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+  }
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByPointerArithmeticSubElement) {
+  const std::string Code = R"(
+      void f() {
+        int* x;
+        int* y = &x[1];
+      })";
+  auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+}
+
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByConditionOperator) {
+  const std::string Code = R"(
+      void f() {
+        int* x;
+        int* y = 1 ? nullptr : x;
+      })";
+  auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+  auto Results =
+      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+  EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+}
+
 } // namespace clang
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 1f0beafaad7f7..9746aa3547846 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -176,6 +176,7 @@ TEST(ConfigParseTest, ParsesConfigurationBools) {
   CHECK_PARSE_BOOL(IndentAccessModifiers);
   CHECK_PARSE_BOOL(IndentCaseBlocks);
   CHECK_PARSE_BOOL(IndentCaseLabels);
+  CHECK_PARSE_BOOL(IndentExportBlock);
   CHECK_PARSE_BOOL(IndentGotoLabels);
   CHECK_PARSE_BOOL(IndentRequiresClause);
   CHECK_PARSE_BOOL_FIELD(IndentRequiresClause, "IndentRequires");
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 9623187073a0b..61aa140dfdc9c 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -9059,9 +9059,9 @@ TEST_F(FormatTest, AdaptiveOnePerLineFormatting) {
                Style);
 }
 
-TEST_F(FormatTest, ExportBlockIndentation) {
+TEST_F(FormatTest, IndentExportBlock) {
   FormatStyle Style = getLLVMStyleWithColumns(80);
-  Style.ExportBlockIndentation = true;
+  Style.IndentExportBlock = true;
   verifyFormat("export {\n"
                "  int x;\n"
                "  int y;\n"
@@ -9072,7 +9072,7 @@ TEST_F(FormatTest, ExportBlockIndentation) {
                "}",
                Style);
 
-  Style.ExportBlockIndentation = false;
+  Style.IndentExportBlock = false;
   verifyFormat("export {\n"
                "int x;\n"
                "int y;\n"
@@ -9086,7 +9086,7 @@ TEST_F(FormatTest, ExportBlockIndentation) {
 
 TEST_F(FormatTest, ShortExportBlocks) {
   FormatStyle Style = getLLVMStyleWithColumns(80);
-  Style.ExportBlockIndentation = false;
+  Style.IndentExportBlock = false;
 
   Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Never;
   verifyFormat("export {\n"
diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp
index 9ed567445eb07..129828b0d187a 100644
--- a/clang/unittests/Format/QualifierFixerTest.cpp
+++ b/clang/unittests/Format/QualifierFixerTest.cpp
@@ -1279,6 +1279,18 @@ TEST_F(QualifierFixerTest, WithConstraints) {
                Style);
 }
 
+TEST_F(QualifierFixerTest, WithCpp11Attribute) {
+  FormatStyle Style = getLLVMStyle();
+  Style.QualifierAlignment = FormatStyle::QAS_Custom;
+  Style.QualifierOrder = {"static", "constexpr", "inline", "type"};
+
+  verifyFormat("[[nodiscard]] static constexpr inline int func() noexcept {}",
+               "[[nodiscard]] inline constexpr static int func() noexcept {}",
+               Style);
+  verifyFormat("[[maybe_unused]] static constexpr int A",
+               "[[maybe_unused]] constexpr static int A", Style);
+}
+
 TEST_F(QualifierFixerTest, DisableRegions) {
   FormatStyle Style = getLLVMStyle();
   Style.QualifierAlignment = FormatStyle::QAS_Custom;
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index c897998cabe66..29c61fee6f531 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -640,6 +640,41 @@ TEST_F(LexerTest, FindNextTokenIncludingComments) {
                           "=", "abcd", ";"));
 }
 
+TEST_F(LexerTest, FindPreviousToken) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
+                                                ";", "0", "=", "abcd", "int"));
+}
+
+TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken,
+              ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
+                          "0", "=", "abcd", "int"));
+}
+
 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
   TrivialModuleLoader ModLoader;
   auto PP = CreatePP("", ModLoader);
diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
index fb00c640d6b14..5f03efdb80434 100644
--- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -1782,19 +1782,12 @@ static void emitDiagArrays(DiagsInGroupTy &DiagsInGroup,
 
 /// Emit a list of group names.
 ///
-/// This creates a long string which by itself contains a list of pascal style
-/// strings, which consist of a length byte directly followed by the string.
-///
-/// \code
-///   static const char DiagGroupNames[] = {
-///     \000\020#pragma-messages\t#warnings\020CFString-literal"
-///   };
-/// \endcode
+/// This creates an `llvm::StringTable` of all the diagnostic group names.
 static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
                                raw_ostream &OS) {
-  OS << "static const char DiagGroupNames[] = {\n";
-  GroupNames.EmitString(OS);
-  OS << "};\n\n";
+  GroupNames.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable DiagGroupNames");
+  OS << "\n";
 }
 
 /// Emit diagnostic arrays and related data structures.
@@ -1806,7 +1799,7 @@ static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
 ///  #ifdef GET_DIAG_ARRAYS
 ///     static const int16_t DiagArrays[];
 ///     static const int16_t DiagSubGroups[];
-///     static const char DiagGroupNames[];
+///     static constexpr llvm::StringTable DiagGroupNames;
 ///  #endif
 ///  \endcode
 static void emitAllDiagArrays(DiagsInGroupTy &DiagsInGroup,
@@ -1858,9 +1851,7 @@ static void emitDiagTable(DiagsInGroupTy &DiagsInGroup,
                                "0123456789!@#$%^*-+=:?") != std::string::npos)
       PrintFatalError("Invalid character in diagnostic group '" + Name + "'");
     OS << Name << " */, ";
-    // Store a pascal-style length byte at the beginning of the string.
-    std::string PascalName = char(Name.size()) + Name.str();
-    OS << *GroupNames.GetStringOffset(PascalName) << ", ";
+    OS << *GroupNames.GetStringOffset(Name) << ", ";
 
     // Special handling for 'pedantic'.
     const bool IsPedantic = Name == "pedantic";
@@ -1948,10 +1939,11 @@ void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
   inferPedantic.compute(&DiagsInPedantic, &GroupsInPedantic);
 
   StringToOffsetTable GroupNames;
+  // Add an empty string to the table first so we can use `llvm::StringTable`.
+  // TODO: Factor this into `StringToOffsetTable`.
+  GroupNames.GetOrAddStringOffset("");
   for (const auto &[Name, Group] : DiagsInGroup) {
-    // Store a pascal-style length byte at the beginning of the string.
-    std::string PascalName = char(Name.size()) + Name.str();
-    GroupNames.GetOrAddStringOffset(PascalName, false);
+    GroupNames.GetOrAddStringOffset(Name);
   }
 
   emitAllDiagArrays(DiagsInGroup, DiagsInPedantic, GroupsInPedantic, GroupNames,
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 35477cfc3cf45..0ecbf7cede1da 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1050,7 +1050,7 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS,
     else if (T.isBFloat())
       TypeCode = "bf";
     else if (T.isMFloat())
-      TypeCode = "mfp";
+      TypeCode = "mf";
     else
       TypeCode = 'f';
     Ret.replace(Pos, NumChars, TypeCode + utostr(T.getElementSizeInBits()));
diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index a8e79cd3475ab..575055117b35f 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -439,8 +439,8 @@ <h2 id="cxx23">C++23 implementation status</h2>
     </tr>
     <tr>
       <td>Using unknown pointers and references in constant expressions</td>
-      <td><a href="https://wg21.link/P2280R4">P2280R4</a> (<a href="#dr">DR</a>)</td>
-      <td class="none" align="center">No</td>
+      <td><a href="https://wg21.link/P2280R4">P2280R4</a></td>
+      <td class="unreleased" align="center">Clang 20</td>
     </tr>
     <tr>
       <td>static <code>operator()</code></td>
diff --git a/compiler-rt/lib/builtins/arm/adddf3vfp.S b/compiler-rt/lib/builtins/arm/adddf3vfp.S
index 1a271db0847c2..280f5ab075638 100644
--- a/compiler-rt/lib/builtins/arm/adddf3vfp.S
+++ b/compiler-rt/lib/builtins/arm/adddf3vfp.S
@@ -19,10 +19,10 @@ DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vadd.f64 d0, d0, d1
 #else
-	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
-	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	VMOV_TO_DOUBLE(d6, r0, r1)		// move first param from r0/r1 pair into d6
+	VMOV_TO_DOUBLE(d7, r2, r3)		// move second param from r2/r3 pair into d7
 	vadd.f64 d6, d6, d7
-	vmov	r0, r1, d6		// move result back to r0/r1 pair
+	VMOV_FROM_DOUBLE(r0, r1, d6)		// move result back to r0/r1 pair
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__adddf3vfp)
diff --git a/compiler-rt/lib/builtins/arm/aeabi_dcmp.S b/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
index 5f720670ddd7c..bee14b3ff8afe 100644
--- a/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
+++ b/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
@@ -18,9 +18,9 @@
 // }
 
 #if defined(COMPILER_RT_ARMHF_TARGET)
-#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS                    \
-        vmov      d0, r0, r1                     SEPARATOR \
-        vmov      d1, r2, r3
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS \
+     VMOV_TO_DOUBLE(d0, r0, r1)         \
+     VMOV_TO_DOUBLE(d1, r2, r3)
 #else
 #  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
 #endif
diff --git a/compiler-rt/lib/builtins/arm/divdf3vfp.S b/compiler-rt/lib/builtins/arm/divdf3vfp.S
index ad50b57a651db..c8c0aa84c192f 100644
--- a/compiler-rt/lib/builtins/arm/divdf3vfp.S
+++ b/compiler-rt/lib/builtins/arm/divdf3vfp.S
@@ -20,10 +20,10 @@ DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vdiv.f64 d0, d0, d1
 #else
-	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
-	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	VMOV_TO_DOUBLE(d6, r0, r1)		// move first param from r0/r1 pair into d6
+	VMOV_TO_DOUBLE(d7, r2, r3)		// move second param from r2/r3 pair into d7
 	vdiv.f64 d5, d6, d7
-	vmov	r0, r1, d5		// move result back to r0/r1 pair
+	VMOV_FROM_DOUBLE(r0, r1, d5)		// move result back to r0/r1 pair
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divdf3vfp)
diff --git a/compiler-rt/lib/builtins/arm/eqdf2vfp.S b/compiler-rt/lib/builtins/arm/eqdf2vfp.S
index 2a0a64b97e7d4..a6f341dc1f46d 100644
--- a/compiler-rt/lib/builtins/arm/eqdf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/eqdf2vfp.S
@@ -20,8 +20,8 @@ DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov	d6, r0, r1	// load r0/r1 pair in double register
-	vmov	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S b/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
index 37c8be8dcd9c7..815be830003a2 100644
--- a/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
@@ -23,7 +23,7 @@ DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
 #else
 	vmov	s15, r0      // load float register from R0
 	vcvt.f64.f32 d7, s15 // convert single to double
-	vmov	r0, r1, d7   // return result in r0/r1 pair
+	VMOV_FROM_DOUBLE(r0, r1, d7)   // return result in r0/r1 pair
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
diff --git a/compiler-rt/lib/builtins/arm/fixdfsivfp.S b/compiler-rt/lib/builtins/arm/fixdfsivfp.S
index af1d4f4fa5f5e..d708f3f4d8051 100644
--- a/compiler-rt/lib/builtins/arm/fixdfsivfp.S
+++ b/compiler-rt/lib/builtins/arm/fixdfsivfp.S
@@ -22,7 +22,7 @@ DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
 	vcvt.s32.f64 s0, d0
 	vmov r0, s0
 #else
-	vmov	d7, r0, r1    // load double register from R0/R1
+	VMOV_TO_DOUBLE(d7, r0, r1)    // load double register from R0/R1
 	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
 #endif
diff --git a/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S b/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
index 44e6dbd4989e1..a3dda15e8c045 100644
--- a/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
+++ b/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
@@ -23,7 +23,7 @@ DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
 	vcvt.u32.f64 s0, d0
 	vmov r0, s0
 #else
-	vmov	d7, r0, r1    // load double register from R0/R1
+	VMOV_TO_DOUBLE(d7, r0, r1)    // load double register from R0/R1
 	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
 #endif
diff --git a/compiler-rt/lib/builtins/arm/floatsidfvfp.S b/compiler-rt/lib/builtins/arm/floatsidfvfp.S
index ae8d2465889c6..d0fc5e8a4480b 100644
--- a/compiler-rt/lib/builtins/arm/floatsidfvfp.S
+++ b/compiler-rt/lib/builtins/arm/floatsidfvfp.S
@@ -24,7 +24,7 @@ DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
 #else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
-	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+	VMOV_FROM_DOUBLE(r0, r1, d7)     // move d7 to result register pair r0/r1
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsidfvfp)
diff --git a/compiler-rt/lib/builtins/arm/floatunssidfvfp.S b/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
index 0932dab2bdb9d..5acc2d5c0b25d 100644
--- a/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
+++ b/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
@@ -24,7 +24,7 @@ DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
 #else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
-	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+	VMOV_FROM_DOUBLE(r0, r1, d7) // move d7 to result register pair r0/r1
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssidfvfp)
diff --git a/compiler-rt/lib/builtins/arm/gedf2vfp.S b/compiler-rt/lib/builtins/arm/gedf2vfp.S
index 2af9d909967b0..00746b891c997 100644
--- a/compiler-rt/lib/builtins/arm/gedf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/gedf2vfp.S
@@ -21,8 +21,8 @@ DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/gtdf2vfp.S b/compiler-rt/lib/builtins/arm/gtdf2vfp.S
index 782ad8cac013d..980a09eb24b01 100644
--- a/compiler-rt/lib/builtins/arm/gtdf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/gtdf2vfp.S
@@ -21,8 +21,8 @@ DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/ledf2vfp.S b/compiler-rt/lib/builtins/arm/ledf2vfp.S
index 0097e4b6c129a..c7fe6d84535a4 100644
--- a/compiler-rt/lib/builtins/arm/ledf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/ledf2vfp.S
@@ -21,8 +21,8 @@ DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/ltdf2vfp.S b/compiler-rt/lib/builtins/arm/ltdf2vfp.S
index a126aa9e0536a..be5827075f993 100644
--- a/compiler-rt/lib/builtins/arm/ltdf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/ltdf2vfp.S
@@ -21,8 +21,8 @@ DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/muldf3vfp.S b/compiler-rt/lib/builtins/arm/muldf3vfp.S
index 9adc937bcb3f0..97daf7363787b 100644
--- a/compiler-rt/lib/builtins/arm/muldf3vfp.S
+++ b/compiler-rt/lib/builtins/arm/muldf3vfp.S
@@ -20,10 +20,10 @@ DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vmul.f64 d0, d0, d1
 #else
-	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
-	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	VMOV_TO_DOUBLE(d6, r0, r1)         // move first param from r0/r1 pair into d6
+	VMOV_TO_DOUBLE(d7, r2, r3)         // move second param from r2/r3 pair into d7
 	vmul.f64 d6, d6, d7
-	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+	VMOV_FROM_DOUBLE(r0, r1, d6)         // move result back to r0/r1 pair
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__muldf3vfp)
diff --git a/compiler-rt/lib/builtins/arm/nedf2vfp.S b/compiler-rt/lib/builtins/arm/nedf2vfp.S
index 32d35c41d4664..5edafc25988db 100644
--- a/compiler-rt/lib/builtins/arm/nedf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/nedf2vfp.S
@@ -20,8 +20,8 @@ DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/arm/subdf3vfp.S b/compiler-rt/lib/builtins/arm/subdf3vfp.S
index f4eaf9af1afee..2a7b1d38b577f 100644
--- a/compiler-rt/lib/builtins/arm/subdf3vfp.S
+++ b/compiler-rt/lib/builtins/arm/subdf3vfp.S
@@ -20,10 +20,10 @@ DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vsub.f64 d0, d0, d1
 #else
-	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
-	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	VMOV_TO_DOUBLE(d6, r0, r1)         // move first param from r0/r1 pair into d6
+	VMOV_TO_DOUBLE(d7, r2, r3)         // move second param from r2/r3 pair into d7
 	vsub.f64 d6, d6, d7
-	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+	VMOV_FROM_DOUBLE(r0, r1, d6)         // move result back to r0/r1 pair
 #endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subdf3vfp)
diff --git a/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S b/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
index e1c171262a78b..541d025b4f922 100644
--- a/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
@@ -21,7 +21,7 @@ DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcvt.f32.f64 s0, d0
 #else
-	vmov 	d7, r0, r1   // load double from r0/r1 pair
+	VMOV_TO_DOUBLE(d7, r0, r1)   // load double from r0/r1 pair
 	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
 	vmov 	r0, s15      // return result in r0
 #endif
diff --git a/compiler-rt/lib/builtins/arm/unorddf2vfp.S b/compiler-rt/lib/builtins/arm/unorddf2vfp.S
index ea36a1cb55949..3abb622c81ecb 100644
--- a/compiler-rt/lib/builtins/arm/unorddf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/unorddf2vfp.S
@@ -21,8 +21,8 @@ DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
 #if defined(COMPILER_RT_ARMHF_TARGET)
 	vcmp.f64 d0, d1
 #else
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	VMOV_TO_DOUBLE(d6, r0, r1)	// load r0/r1 pair in double register
+	VMOV_TO_DOUBLE(d7, r2, r3)	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h
index 8c42fc773483b..34c71241524dc 100644
--- a/compiler-rt/lib/builtins/assembly.h
+++ b/compiler-rt/lib/builtins/assembly.h
@@ -290,4 +290,16 @@
   CFI_END
 #endif
 
+#ifdef __arm__
+#include "int_endianness.h"
+
+#if _YUGA_BIG_ENDIAN
+#define VMOV_TO_DOUBLE(dst, src0, src1) vmov dst, src1, src0 SEPARATOR
+#define VMOV_FROM_DOUBLE(dst0, dst1, src) vmov dst1, dst0, src SEPARATOR
+#else
+#define VMOV_TO_DOUBLE(dst, src0, src1) vmov dst, src0, src1 SEPARATOR
+#define VMOV_FROM_DOUBLE(dst0, dst1, src) vmov dst0, dst1, src SEPARATOR
+#endif
+#endif
+
 #endif // COMPILERRT_ASSEMBLY_H
diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 0841161ee5c42..64004c171d534 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -606,6 +606,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
     case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
     case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
+    case 0xBA:  // ba XX XX XX XX : mov edx, XX XX XX XX
       return 5;
 
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
@@ -666,6 +667,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   }
 
   switch (0x00FFFFFF & *(u32 *)address) {
+    case 0x244C8D:  // 8D 4C 24 XX : lea ecx, [esp + XX]
+    case 0x2474FF:  // FF 74 24 XX : push qword ptr [rsp + XX]
+      return 4;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
   }
@@ -829,8 +833,13 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
     case 0xec8348:    // 48 83 ec XX : sub rsp, XX
     case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
+    case 0x488d49:    // 49 8d 48 XX : lea rcx, [...]
+    case 0x048d4c:    // 4c 8d 04 XX : lea r8, [...]
+    case 0x148d4e:    // 4e 8d 14 XX : lea r10, [...]
+    case 0x398366:    // 66 83 39 XX : cmp WORD PTR [rcx], XX
       return 4;
 
+    case 0x441F0F:  // 0F 1F 44 XX XX :   nop DWORD PTR [...]
     case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
       return 5;
 
@@ -883,6 +892,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   }
 
   switch (*(u32*)(address)) {
+    case 0x01b60f44:  // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
+    case 0x09b60f44:  // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
+    case 0x0ab60f44:  // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
+    case 0x11b60f44:  // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
     case 0x1ab60f44:  // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
       return 4;
     case 0x24448b48:  // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
@@ -902,6 +915,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
       return 6;
   }
 
+  switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
+    case 0xC07E0F4866:  // 66 48 0F 7E C0 : movq rax, xmm0
+      return 5;
+  }
+
 #else
 
   switch (*(u8*)address) {
diff --git a/compiler-rt/lib/interception/tests/interception_win_test.cpp b/compiler-rt/lib/interception/tests/interception_win_test.cpp
index 9d8dbfcb4285f..c5dcf26070f0d 100644
--- a/compiler-rt/lib/interception/tests/interception_win_test.cpp
+++ b/compiler-rt/lib/interception/tests/interception_win_test.cpp
@@ -864,9 +864,12 @@ const struct InstructionSizeData {
     { 3, {0x8B, 0x55, 0x72}, 0, "8B 55 XX : mov XX(%ebp), edx"},
     { 3, {0x8B, 0x75, 0x72}, 0, "8B 75 XX : mov XX(%ebp), esp"},
     { 3, {0xc2, 0x71, 0x72}, 0, "C2 XX XX : ret XX (needed for registering weak functions)"},
+    { 4, {0x8D, 0x4C, 0x24, 0x73}, 0, "8D 4C 24 XX : lea ecx, [esp + XX]"},
+    { 4, {0xFF, 0x74, 0x24, 0x73}, 0, "FF 74 24 XX : push qword ptr [rsp + XX]"},
     { 5, {0x68, 0x71, 0x72, 0x73, 0x74}, 0, "68 XX XX XX XX : push imm32"},
     { 5, {0xb8, 0x71, 0x72, 0x73, 0x74}, 0, "b8 XX XX XX XX : mov eax, XX XX XX XX"},
     { 5, {0xB9, 0x71, 0x72, 0x73, 0x74}, 0, "b9 XX XX XX XX : mov ecx, XX XX XX XX"},
+    { 5, {0xBA, 0x71, 0x72, 0x73, 0x74}, 0, "ba XX XX XX XX : mov edx, XX XX XX XX"},
     { 7, {0x8D, 0xA4, 0x24, 0x73, 0x74, 0x75, 0x76}, 0, "8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]"},
 #if SANITIZER_WINDOWS_x64
     // sorted list
@@ -961,11 +964,19 @@ const struct InstructionSizeData {
     { 3, {0x4d, 0x85, 0xf6}, 0, "4d 85 f6 : test r14, r14"},
     { 3, {0x4d, 0x85, 0xff}, 0, "4d 85 ff : test r15, r15"},
     { 3, {0xf6, 0xc1, 0x72}, 0, "f6 c1 XX : test cl, XX"},
+    { 4, {0x44, 0x0f, 0xb6, 0x01}, 0, "44 0f b6 01 : movzx r8d, BYTE PTR [rcx]"},
+    { 4, {0x44, 0x0f, 0xb6, 0x09}, 0, "44 0f b6 09 : movzx r9d, BYTE PTR [rcx]"},
+    { 4, {0x44, 0x0f, 0xb6, 0x0a}, 0, "44 0f b6 0a : movzx r8d, BYTE PTR [rdx]"},
+    { 4, {0x44, 0x0f, 0xb6, 0x11}, 0, "44 0f b6 11 : movzx r10d, BYTE PTR [rcx]"},
     { 4, {0x44, 0x0f, 0xb6, 0x1a}, 0, "44 0f b6 1a : movzx r11d, BYTE PTR [rdx]"},
     { 4, {0x44, 0x8d, 0x42, 0x73}, 0, "44 8d 42 XX : lea r8d , [rdx + XX]"},
     { 4, {0x48, 0x83, 0xec, 0x73}, 0, "48 83 ec XX : sub rsp, XX"},
     { 4, {0x48, 0x89, 0x58, 0x73}, 0, "48 89 58 XX : mov QWORD PTR[rax + XX], rbx"},
     { 4, {0x49, 0x83, 0xf8, 0x73}, 0, "49 83 f8 XX : cmp r8, XX"},
+    { 4, {0x49, 0x8d, 0x48, 0x73}, 0, "49 8d 48 XX : lea rcx, [...]"},
+    { 4, {0x4c, 0x8d, 0x04, 0x73}, 0, "4c 8d 04 XX : lea r8, [...]"},
+    { 4, {0x4e, 0x8d, 0x14, 0x73}, 0, "4e 8d 14 XX : lea r10, [...]"},
+    { 4, {0x66, 0x83, 0x39, 0x73}, 0, "66 83 39 XX : cmp WORD PTR [rcx], XX"},
     { 4, {0x80, 0x78, 0x72, 0x73}, 0, "80 78 YY XX : cmp BYTE PTR [rax+YY], XX"},
     { 4, {0x80, 0x79, 0x72, 0x73}, 0, "80 79 YY XX : cmp BYTE ptr [rcx+YY], XX"},
     { 4, {0x80, 0x7A, 0x72, 0x73}, 0, "80 7A YY XX : cmp BYTE PTR [rdx+YY], XX"},
@@ -973,6 +984,7 @@ const struct InstructionSizeData {
     { 4, {0x80, 0x7D, 0x72, 0x73}, 0, "80 7D YY XX : cmp BYTE PTR [rbp+YY], XX"},
     { 4, {0x80, 0x7E, 0x72, 0x73}, 0, "80 7E YY XX : cmp BYTE PTR [rsi+YY], XX"},
     { 4, {0x89, 0x54, 0x24, 0x73}, 0, "89 54 24 XX : mov DWORD PTR[rsp + XX], edx"},
+    { 5, {0x0F, 0x1F, 0x44, 0x73, 0x74}, 0, "0F 1F 44 XX XX : nop DWORD PTR [...]"},
     { 5, {0x44, 0x89, 0x44, 0x24, 0x74}, 0, "44 89 44 24 XX : mov DWORD PTR [rsp + XX], r8d"},
     { 5, {0x44, 0x89, 0x4c, 0x24, 0x74}, 0, "44 89 4c 24 XX : mov DWORD PTR [rsp + XX], r9d"},
     { 5, {0x48, 0x89, 0x4C, 0x24, 0x74}, 0, "48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx"},
@@ -985,6 +997,7 @@ const struct InstructionSizeData {
     { 5, {0x48, 0x8d, 0x6c, 0x24, 0x74}, 0, "48 8d 6c 24 XX : lea rbp, [rsp + XX]"},
     { 5, {0x4c, 0x89, 0x44, 0x24, 0x74}, 0, "4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8"},
     { 5, {0x4c, 0x89, 0x4c, 0x24, 0x74}, 0, "4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9"},
+    { 5, {0x66, 0x48, 0x0F, 0x7E, 0xC0}, 0, "66 48 0F 7E C0 : movq rax, xmm0"},
     { 5, {0x83, 0x44, 0x72, 0x73, 0x74}, 0, "83 44 72 XX YY : add DWORD PTR [rdx+rsi*2+XX],YY"},
     { 5, {0x83, 0x64, 0x24, 0x73, 0x74}, 0, "83 64 24 XX YY : and DWORD PTR [rsp+XX], YY"},
     { 6, {0x48, 0x83, 0x64, 0x24, 0x74, 0x75}, 0, "48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY"},
diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 34c2d4cb37fd0..71938d3edba38 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -808,6 +808,35 @@ INTERCEPTOR(int, munmap, void *addr, size_t length) {
   return REAL(munmap)(addr, length);
 }
 
+INTERCEPTOR(int, madvise, void *addr, size_t length, int flag) {
+  __rtsan_notify_intercepted_call("madvise");
+  return REAL(madvise)(addr, length, flag);
+}
+
+INTERCEPTOR(int, posix_madvise, void *addr, size_t length, int flag) {
+  __rtsan_notify_intercepted_call("posix_madvise");
+  return REAL(posix_madvise)(addr, length, flag);
+}
+
+INTERCEPTOR(int, mprotect, void *addr, size_t length, int prot) {
+  __rtsan_notify_intercepted_call("mprotect");
+  return REAL(mprotect)(addr, length, prot);
+}
+
+INTERCEPTOR(int, msync, void *addr, size_t length, int flag) {
+  __rtsan_notify_intercepted_call("msync");
+  return REAL(msync)(addr, length, flag);
+}
+
+#if SANITIZER_APPLE
+INTERCEPTOR(int, mincore, const void *addr, size_t length, char *vec) {
+#else
+INTERCEPTOR(int, mincore, void *addr, size_t length, unsigned char *vec) {
+#endif
+  __rtsan_notify_intercepted_call("mincore");
+  return REAL(mincore)(addr, length, vec);
+}
+
 INTERCEPTOR(int, shm_open, const char *name, int oflag, mode_t mode) {
   __rtsan_notify_intercepted_call("shm_open");
   return REAL(shm_open)(name, oflag, mode);
@@ -894,8 +923,13 @@ INTERCEPTOR(ssize_t, sendmsg, int socket, const struct msghdr *message,
 }
 
 #if SANITIZER_INTERCEPT_SENDMMSG
+#if SANITIZER_MUSL
+INTERCEPTOR(int, sendmmsg, int socket, struct mmsghdr *message,
+            unsigned int len, unsigned int flags) {
+#else
 INTERCEPTOR(int, sendmmsg, int socket, struct mmsghdr *message,
             unsigned int len, int flags) {
+#endif
   __rtsan_notify_intercepted_call("sendmmsg");
   return REAL(sendmmsg)(socket, message, len, flags);
 }
@@ -927,8 +961,16 @@ INTERCEPTOR(ssize_t, recvmsg, int socket, struct msghdr *message, int flags) {
 }
 
 #if SANITIZER_INTERCEPT_RECVMMSG
+#if SANITIZER_MUSL
+INTERCEPTOR(int, recvmmsg, int socket, struct mmsghdr *message,
+            unsigned int len, unsigned int flags, struct timespec *timeout) {
+#elif defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ < 21
+INTERCEPTOR(int, recvmmsg, int socket, struct mmsghdr *message,
+            unsigned int len, int flags, const struct timespec *timeout) {
+#else
 INTERCEPTOR(int, recvmmsg, int socket, struct mmsghdr *message,
             unsigned int len, int flags, struct timespec *timeout) {
+#endif // defined(__GLIBC_MINOR) && __GLIBC_MINOR__ < 21
   __rtsan_notify_intercepted_call("recvmmsg");
   return REAL(recvmmsg)(socket, message, len, flags, timeout);
 }
@@ -1093,6 +1135,32 @@ INTERCEPTOR(int, execve, const char *filename, char *const argv[],
   return REAL(execve)(filename, argv, envp);
 }
 
+#if SANITIZER_INTERCEPT_PROCESS_VM_READV
+INTERCEPTOR(ssize_t, process_vm_readv, pid_t pid, const struct iovec *local_iov,
+            unsigned long liovcnt, const struct iovec *remote_iov,
+            unsigned long riovcnt, unsigned long flags) {
+  __rtsan_notify_intercepted_call("process_vm_readv");
+  return REAL(process_vm_readv)(pid, local_iov, liovcnt, remote_iov, riovcnt,
+                                flags);
+}
+
+INTERCEPTOR(ssize_t, process_vm_writev, pid_t pid,
+            const struct iovec *local_iov, unsigned long liovcnt,
+            const struct iovec *remote_iov, unsigned long riovcnt,
+            unsigned long flags) {
+  __rtsan_notify_intercepted_call("process_vm_writev");
+  return REAL(process_vm_writev)(pid, local_iov, liovcnt, remote_iov, riovcnt,
+                                 flags);
+}
+#define RTSAN_MAYBE_INTERCEPT_PROCESS_VM_READV                                 \
+  INTERCEPT_FUNCTION(process_vm_readv)
+#define RTSAN_MAYBE_INTERCEPT_PROCESS_VM_WRITEV                                \
+  INTERCEPT_FUNCTION(process_vm_writev)
+#else
+#define RTSAN_MAYBE_INTERCEPT_PROCESS_VM_READV
+#define RTSAN_MAYBE_INTERCEPT_PROCESS_VM_WRITEV
+#endif
+
 // TODO: the `wait` family of functions is an oddity. In testing, if you
 // intercept them, Darwin seemingly ignores them, and linux never returns from
 // the test. Revisit this in the future, but hopefully intercepting fork/exec is
@@ -1148,6 +1216,11 @@ void __rtsan::InitializeInterceptors() {
   INTERCEPT_FUNCTION(mmap);
   RTSAN_MAYBE_INTERCEPT_MMAP64;
   INTERCEPT_FUNCTION(munmap);
+  INTERCEPT_FUNCTION(madvise);
+  INTERCEPT_FUNCTION(posix_madvise);
+  INTERCEPT_FUNCTION(mprotect);
+  INTERCEPT_FUNCTION(msync);
+  INTERCEPT_FUNCTION(mincore);
   INTERCEPT_FUNCTION(shm_open);
   INTERCEPT_FUNCTION(shm_unlink);
   RTSAN_MAYBE_INTERCEPT_MEMALIGN;
@@ -1272,6 +1345,9 @@ void __rtsan::InitializeInterceptors() {
   INTERCEPT_FUNCTION(fork);
   INTERCEPT_FUNCTION(execve);
 
+  RTSAN_MAYBE_INTERCEPT_PROCESS_VM_READV;
+  RTSAN_MAYBE_INTERCEPT_PROCESS_VM_WRITEV;
+
   INTERCEPT_FUNCTION(syscall);
 }
 
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
index c858a5a771fe4..0a59ae0ea9254 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
@@ -204,6 +204,75 @@ TEST(TestRtsanInterceptors, MunmapDiesWhenRealtime) {
   ExpectNonRealtimeSurvival(Func);
 }
 
+class RtsanOpenedMmapTest : public RtsanFileTest {
+protected:
+  void SetUp() override {
+    RtsanFileTest::SetUp();
+    file = fopen(GetTemporaryFilePath(), "w+");
+    ASSERT_THAT(file, Ne(nullptr));
+    fd = fileno(file);
+    ASSERT_THAT(fd, Ne(-1));
+    int ret = ftruncate(GetOpenFd(), size);
+    ASSERT_THAT(ret, Ne(-1));
+    addr =
+        mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, GetOpenFd(), 0);
+    ASSERT_THAT(addr, Ne(MAP_FAILED));
+    ASSERT_THAT(addr, Ne(nullptr));
+  }
+
+  void TearDown() override {
+    if (addr != nullptr && addr != MAP_FAILED)
+      munmap(addr, size);
+    RtsanFileTest::TearDown();
+  }
+
+  void *GetAddr() { return addr; }
+  static constexpr size_t GetSize() { return size; }
+
+  int GetOpenFd() { return fd; }
+
+private:
+  void *addr = nullptr;
+  static constexpr size_t size = 4096;
+  FILE *file = nullptr;
+  int fd = -1;
+};
+
+TEST_F(RtsanOpenedMmapTest, MadviseDiesWhenRealtime) {
+  auto Func = [this]() { madvise(GetAddr(), GetSize(), MADV_NORMAL); };
+  ExpectRealtimeDeath(Func, "madvise");
+  ExpectNonRealtimeSurvival(Func);
+}
+
+TEST_F(RtsanOpenedMmapTest, PosixMadviseDiesWhenRealtime) {
+  auto Func = [this]() { posix_madvise(GetAddr(), GetSize(), MADV_NORMAL); };
+  ExpectRealtimeDeath(Func, "posix_madvise");
+  ExpectNonRealtimeSurvival(Func);
+}
+
+TEST_F(RtsanOpenedMmapTest, MprotectDiesWhenRealtime) {
+  auto Func = [this]() { mprotect(GetAddr(), GetSize(), PROT_READ); };
+  ExpectRealtimeDeath(Func, "mprotect");
+  ExpectNonRealtimeSurvival(Func);
+}
+
+TEST_F(RtsanOpenedMmapTest, MsyncDiesWhenRealtime) {
+  auto Func = [this]() { msync(GetAddr(), GetSize(), MS_INVALIDATE); };
+  ExpectRealtimeDeath(Func, "msync");
+  ExpectNonRealtimeSurvival(Func);
+}
+
+TEST_F(RtsanOpenedMmapTest, MincoreDiesWhenRealtime) {
+#if SANITIZER_APPLE
+  std::vector<char> vec(GetSize() / 1024);
+#else
+  std::vector<unsigned char> vec(GetSize() / 1024);
+#endif
+  auto Func = [this, &vec]() { mincore(GetAddr(), GetSize(), vec.data()); };
+  ExpectRealtimeDeath(Func, "mincore");
+  ExpectNonRealtimeSurvival(Func);
+}
+
 TEST(TestRtsanInterceptors, ShmOpenDiesWhenRealtime) {
   auto Func = []() { shm_open("/rtsan_test_shm", O_CREAT | O_RDWR, 0); };
   ExpectRealtimeDeath(Func, "shm_open");
@@ -657,6 +726,28 @@ TEST(TestRtsanInterceptors, UmaskDiesWhenRealtime) {
   ExpectNonRealtimeSurvival(Func);
 }
 
+#if SANITIZER_INTERCEPT_PROCESS_VM_READV
+TEST(TestRtsanInterceptors, ProcessVmReadvDiesWhenRealtime) {
+  char stack[1024];
+  int p;
+  iovec lcl{&stack, sizeof(stack)};
+  iovec rmt{&p, sizeof(p)};
+  auto Func = [&lcl, &rmt]() { process_vm_readv(0, &lcl, 1, &rmt, 1, 0); };
+  ExpectRealtimeDeath(Func, "process_vm_readv");
+  ExpectNonRealtimeSurvival(Func);
+}
+
+TEST(TestRtsanInterceptors, ProcessVmWritevDiesWhenRealtime) {
+  char stack[1024];
+  int p;
+  iovec lcl{&p, sizeof(p)};
+  iovec rmt{&stack, sizeof(stack)};
+  auto Func = [&lcl, &rmt]() { process_vm_writev(0, &lcl, 1, &rmt, 1, 0); };
+  ExpectRealtimeDeath(Func, "process_vm_writev");
+  ExpectNonRealtimeSurvival(Func);
+}
+#endif
+
 class RtsanDirectoryTest : public ::testing::Test {
 protected:
   void SetUp() override {
diff --git a/compiler-rt/test/asan/TestCases/Posix/fake_stack_gc.cpp b/compiler-rt/test/asan/TestCases/Posix/fake_stack_gc.cpp
index 8c368b9b1b947..36fdf81120b59 100644
--- a/compiler-rt/test/asan/TestCases/Posix/fake_stack_gc.cpp
+++ b/compiler-rt/test/asan/TestCases/Posix/fake_stack_gc.cpp
@@ -89,6 +89,7 @@ int main(void) {
 
   pthread_t tid;
   assert(pthread_create(&tid, &attr, Thread, alt_stack) == 0);
+  assert(pthread_attr_destroy(&attr) == 0);
 
   pthread_join(tid, nullptr);
 
diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
index 593114bdf2e8d..8e7d5082d0b5d 100644
--- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
+++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
@@ -159,6 +159,7 @@ int main() {
   pthread_attr_init(&ThreadAttr);
   pthread_attr_setstack(&ThreadAttr, Mapping, DefaultStackSize);
   pthread_create(&Thread, &ThreadAttr, &threadFun, (void *)&AltStack);
+  pthread_attr_destroy(&ThreadAttr);
 
   pthread_join(Thread, nullptr);
 
diff --git a/compiler-rt/test/lsan/TestCases/leak_check_before_thread_started.cpp b/compiler-rt/test/lsan/TestCases/leak_check_before_thread_started.cpp
index 68eea93a81e57..d0363a0bf85ae 100644
--- a/compiler-rt/test/lsan/TestCases/leak_check_before_thread_started.cpp
+++ b/compiler-rt/test/lsan/TestCases/leak_check_before_thread_started.cpp
@@ -37,6 +37,7 @@ void create_detached_thread() {
   pthread_mutex_lock(&mutex);
   int res = pthread_create(&thread_id, &attr, func, arg);
   assert(res == 0);
+  pthread_attr_destroy(&attr);
 }
 
 int main() {
diff --git a/flang/include/flang/Lower/DirectivesCommon.h b/flang/include/flang/Lower/DirectivesCommon.h
index 6e2c6ee4b1bcd..c7cac1357b227 100644
--- a/flang/include/flang/Lower/DirectivesCommon.h
+++ b/flang/include/flang/Lower/DirectivesCommon.h
@@ -29,12 +29,9 @@
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Lower/Support/Utils.h"
-#include "flang/Optimizer/Builder/BoxValue.h"
-#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
-#include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/tools.h"
@@ -49,31 +46,6 @@
 namespace Fortran {
 namespace lower {
 
-/// Information gathered to generate bounds operation and data entry/exit
-/// operations.
-struct AddrAndBoundsInfo {
-  explicit AddrAndBoundsInfo() {}
-  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput)
-      : addr(addr), rawInput(rawInput) {}
-  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput,
-                             mlir::Value isPresent)
-      : addr(addr), rawInput(rawInput), isPresent(isPresent) {}
-  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput,
-                             mlir::Value isPresent, mlir::Type boxType)
-      : addr(addr), rawInput(rawInput), isPresent(isPresent), boxType(boxType) {
-  }
-  mlir::Value addr = nullptr;
-  mlir::Value rawInput = nullptr;
-  mlir::Value isPresent = nullptr;
-  mlir::Type boxType = nullptr;
-  void dump(llvm::raw_ostream &os) {
-    os << "AddrAndBoundsInfo addr: " << addr << "\n";
-    os << "AddrAndBoundsInfo rawInput: " << rawInput << "\n";
-    os << "AddrAndBoundsInfo isPresent: " << isPresent << "\n";
-    os << "AddrAndBoundsInfo boxType: " << boxType << "\n";
-  }
-};
-
 /// Populates \p hint and \p memoryOrder with appropriate clause information
 /// if present on atomic construct.
 static inline void genOmpAtomicHintAndMemoryOrderClauses(
@@ -609,195 +581,13 @@ void createEmptyRegionBlocks(
   }
 }
 
-inline AddrAndBoundsInfo getDataOperandBaseAddr(fir::FirOpBuilder &builder,
-                                                mlir::Value symAddr,
-                                                bool isOptional,
-                                                mlir::Location loc) {
-  mlir::Value rawInput = symAddr;
-  if (auto declareOp =
-          mlir::dyn_cast_or_null<hlfir::DeclareOp>(symAddr.getDefiningOp())) {
-    symAddr = declareOp.getResults()[0];
-    rawInput = declareOp.getResults()[1];
-  }
-
-  if (!symAddr)
-    llvm::report_fatal_error("could not retrieve symbol address");
-
-  mlir::Value isPresent;
-  if (isOptional)
-    isPresent =
-        builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), rawInput);
-
-  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(
-          fir::unwrapRefType(symAddr.getType()))) {
-    if (mlir::isa<fir::RecordType>(boxTy.getEleTy()))
-      TODO(loc, "derived type");
-
-    // In case of a box reference, load it here to get the box value.
-    // This is preferrable because then the same box value can then be used for
-    // all address/dimension retrievals. For Fortran optional though, leave
-    // the load generation for later so it can be done in the appropriate
-    // if branches.
-    if (mlir::isa<fir::ReferenceType>(symAddr.getType()) && !isOptional) {
-      mlir::Value addr = builder.create<fir::LoadOp>(loc, symAddr);
-      return AddrAndBoundsInfo(addr, rawInput, isPresent, boxTy);
-    }
-
-    return AddrAndBoundsInfo(symAddr, rawInput, isPresent, boxTy);
-  }
-  return AddrAndBoundsInfo(symAddr, rawInput, isPresent);
-}
-
-inline AddrAndBoundsInfo
+inline fir::factory::AddrAndBoundsInfo
 getDataOperandBaseAddr(Fortran::lower::AbstractConverter &converter,
                        fir::FirOpBuilder &builder,
                        Fortran::lower::SymbolRef sym, mlir::Location loc) {
-  return getDataOperandBaseAddr(builder, converter.getSymbolAddress(sym),
-                                Fortran::semantics::IsOptional(sym), loc);
-}
-
-template <typename BoundsOp, typename BoundsType>
-llvm::SmallVector<mlir::Value>
-gatherBoundsOrBoundValues(fir::FirOpBuilder &builder, mlir::Location loc,
-                          fir::ExtendedValue dataExv, mlir::Value box,
-                          bool collectValuesOnly = false) {
-  assert(box && "box must exist");
-  llvm::SmallVector<mlir::Value> values;
-  mlir::Value byteStride;
-  mlir::Type idxTy = builder.getIndexType();
-  mlir::Type boundTy = builder.getType<BoundsType>();
-  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
-  for (unsigned dim = 0; dim < dataExv.rank(); ++dim) {
-    mlir::Value d = builder.createIntegerConstant(loc, idxTy, dim);
-    mlir::Value baseLb =
-        fir::factory::readLowerBound(builder, loc, dataExv, dim, one);
-    auto dimInfo =
-        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, d);
-    mlir::Value lb = builder.createIntegerConstant(loc, idxTy, 0);
-    mlir::Value ub =
-        builder.create<mlir::arith::SubIOp>(loc, dimInfo.getExtent(), one);
-    if (dim == 0) // First stride is the element size.
-      byteStride = dimInfo.getByteStride();
-    if (collectValuesOnly) {
-      values.push_back(lb);
-      values.push_back(ub);
-      values.push_back(dimInfo.getExtent());
-      values.push_back(byteStride);
-      values.push_back(baseLb);
-    } else {
-      mlir::Value bound = builder.create<BoundsOp>(
-          loc, boundTy, lb, ub, dimInfo.getExtent(), byteStride, true, baseLb);
-      values.push_back(bound);
-    }
-    // Compute the stride for the next dimension.
-    byteStride = builder.create<mlir::arith::MulIOp>(loc, byteStride,
-                                                     dimInfo.getExtent());
-  }
-  return values;
-}
-
-/// Generate the bounds operation from the descriptor information.
-template <typename BoundsOp, typename BoundsType>
-llvm::SmallVector<mlir::Value>
-genBoundsOpsFromBox(fir::FirOpBuilder &builder, mlir::Location loc,
-                    fir::ExtendedValue dataExv,
-                    Fortran::lower::AddrAndBoundsInfo &info) {
-  llvm::SmallVector<mlir::Value> bounds;
-  mlir::Type idxTy = builder.getIndexType();
-  mlir::Type boundTy = builder.getType<BoundsType>();
-
-  assert(mlir::isa<fir::BaseBoxType>(info.boxType) &&
-         "expect fir.box or fir.class");
-  assert(fir::unwrapRefType(info.addr.getType()) == info.boxType &&
-         "expected box type consistency");
-
-  if (info.isPresent) {
-    llvm::SmallVector<mlir::Type> resTypes;
-    constexpr unsigned nbValuesPerBound = 5;
-    for (unsigned dim = 0; dim < dataExv.rank() * nbValuesPerBound; ++dim)
-      resTypes.push_back(idxTy);
-
-    mlir::Operation::result_range ifRes =
-        builder.genIfOp(loc, resTypes, info.isPresent, /*withElseRegion=*/true)
-            .genThen([&]() {
-              mlir::Value box =
-                  !fir::isBoxAddress(info.addr.getType())
-                      ? info.addr
-                      : builder.create<fir::LoadOp>(loc, info.addr);
-              llvm::SmallVector<mlir::Value> boundValues =
-                  gatherBoundsOrBoundValues<BoundsOp, BoundsType>(
-                      builder, loc, dataExv, box,
-                      /*collectValuesOnly=*/true);
-              builder.create<fir::ResultOp>(loc, boundValues);
-            })
-            .genElse([&] {
-              // Box is not present. Populate bound values with default values.
-              llvm::SmallVector<mlir::Value> boundValues;
-              mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
-              mlir::Value mOne = builder.createMinusOneInteger(loc, idxTy);
-              for (unsigned dim = 0; dim < dataExv.rank(); ++dim) {
-                boundValues.push_back(zero); // lb
-                boundValues.push_back(mOne); // ub
-                boundValues.push_back(zero); // extent
-                boundValues.push_back(zero); // byteStride
-                boundValues.push_back(zero); // baseLb
-              }
-              builder.create<fir::ResultOp>(loc, boundValues);
-            })
-            .getResults();
-    // Create the bound operations outside the if-then-else with the if op
-    // results.
-    for (unsigned i = 0; i < ifRes.size(); i += nbValuesPerBound) {
-      mlir::Value bound = builder.create<BoundsOp>(
-          loc, boundTy, ifRes[i], ifRes[i + 1], ifRes[i + 2], ifRes[i + 3],
-          true, ifRes[i + 4]);
-      bounds.push_back(bound);
-    }
-  } else {
-    mlir::Value box = !fir::isBoxAddress(info.addr.getType())
-                          ? info.addr
-                          : builder.create<fir::LoadOp>(loc, info.addr);
-    bounds = gatherBoundsOrBoundValues<BoundsOp, BoundsType>(builder, loc,
-                                                             dataExv, box);
-  }
-  return bounds;
-}
-
-/// Generate bounds operation for base array without any subscripts
-/// provided.
-template <typename BoundsOp, typename BoundsType>
-llvm::SmallVector<mlir::Value>
-genBaseBoundsOps(fir::FirOpBuilder &builder, mlir::Location loc,
-                 fir::ExtendedValue dataExv, bool isAssumedSize) {
-  mlir::Type idxTy = builder.getIndexType();
-  mlir::Type boundTy = builder.getType<BoundsType>();
-  llvm::SmallVector<mlir::Value> bounds;
-
-  if (dataExv.rank() == 0)
-    return bounds;
-
-  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
-  const unsigned rank = dataExv.rank();
-  for (unsigned dim = 0; dim < rank; ++dim) {
-    mlir::Value baseLb =
-        fir::factory::readLowerBound(builder, loc, dataExv, dim, one);
-    mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
-    mlir::Value ub;
-    mlir::Value lb = zero;
-    mlir::Value ext = fir::factory::readExtent(builder, loc, dataExv, dim);
-    if (isAssumedSize && dim + 1 == rank) {
-      ext = zero;
-      ub = lb;
-    } else {
-      // ub = extent - 1
-      ub = builder.create<mlir::arith::SubIOp>(loc, ext, one);
-    }
-
-    mlir::Value bound =
-        builder.create<BoundsOp>(loc, boundTy, lb, ub, ext, one, false, baseLb);
-    bounds.push_back(bound);
-  }
-  return bounds;
+  return fir::factory::getDataOperandBaseAddr(
+      builder, converter.getSymbolAddress(sym),
+      Fortran::semantics::IsOptional(sym), loc);
 }
 
 namespace detail {
@@ -878,7 +668,7 @@ genBoundsOps(fir::FirOpBuilder &builder, mlir::Location loc,
              Fortran::lower::StatementContext &stmtCtx,
              const std::vector<Fortran::evaluate::Subscript> &subscripts,
              std::stringstream &asFortran, fir::ExtendedValue &dataExv,
-             bool dataExvIsAssumedSize, AddrAndBoundsInfo &info,
+             bool dataExvIsAssumedSize, fir::factory::AddrAndBoundsInfo &info,
              bool treatIndexAsSection = false) {
   int dimension = 0;
   mlir::Type idxTy = builder.getIndexType();
@@ -1083,7 +873,7 @@ std::optional<Ref> getRef(Expr &&expr) {
 } // namespace detail
 
 template <typename BoundsOp, typename BoundsType>
-AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
+fir::factory::AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
     Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder,
     semantics::SemanticsContext &semaCtx,
     Fortran::lower::StatementContext &stmtCtx,
@@ -1093,7 +883,7 @@ AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
     llvm::SmallVector<mlir::Value> &bounds, bool treatIndexAsSection = false) {
   using namespace Fortran;
 
-  AddrAndBoundsInfo info;
+  fir::factory::AddrAndBoundsInfo info;
 
   if (!maybeDesignator) {
     info = getDataOperandBaseAddr(converter, builder, symbol, operandLocation);
@@ -1158,9 +948,9 @@ AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
     info.addr = fir::getBase(compExv);
     info.rawInput = info.addr;
     if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(info.addr.getType())))
-      bounds = genBaseBoundsOps<BoundsOp, BoundsType>(builder, operandLocation,
-                                                      compExv,
-                                                      /*isAssumedSize=*/false);
+      bounds = fir::factory::genBaseBoundsOps<BoundsOp, BoundsType>(
+          builder, operandLocation, compExv,
+          /*isAssumedSize=*/false);
     asFortran << designator.AsFortran();
 
     if (semantics::IsOptional(compRef->GetLastSymbol())) {
@@ -1187,7 +977,7 @@ AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
       info.addr = boxAddrOp.getVal();
       info.boxType = info.addr.getType();
       info.rawInput = info.addr;
-      bounds = genBoundsOpsFromBox<BoundsOp, BoundsType>(
+      bounds = fir::factory::genBoundsOpsFromBox<BoundsOp, BoundsType>(
           builder, operandLocation, compExv, info);
     }
   } else {
@@ -1205,13 +995,13 @@ AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
       if (mlir::isa<fir::BaseBoxType>(
               fir::unwrapRefType(info.addr.getType()))) {
         info.boxType = fir::unwrapRefType(info.addr.getType());
-        bounds = genBoundsOpsFromBox<BoundsOp, BoundsType>(
+        bounds = fir::factory::genBoundsOpsFromBox<BoundsOp, BoundsType>(
             builder, operandLocation, dataExv, info);
       }
       bool dataExvIsAssumedSize =
           Fortran::semantics::IsAssumedSizeArray(symRef->get().GetUltimate());
       if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(info.addr.getType())))
-        bounds = genBaseBoundsOps<BoundsOp, BoundsType>(
+        bounds = fir::factory::genBaseBoundsOps<BoundsOp, BoundsType>(
             builder, operandLocation, dataExv, dataExvIsAssumedSize);
       asFortran << symRef->get().name().ToString();
     } else { // Unsupported
@@ -1222,24 +1012,6 @@ AddrAndBoundsInfo gatherDataOperandAddrAndBounds(
   return info;
 }
 
-template <typename BoundsOp, typename BoundsType>
-llvm::SmallVector<mlir::Value>
-genImplicitBoundsOps(fir::FirOpBuilder &builder, lower::AddrAndBoundsInfo &info,
-                     fir::ExtendedValue dataExv, bool dataExvIsAssumedSize,
-                     mlir::Location loc) {
-  llvm::SmallVector<mlir::Value> bounds;
-
-  mlir::Value baseOp = info.rawInput;
-  if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(baseOp.getType())))
-    bounds = lower::genBoundsOpsFromBox<BoundsOp, BoundsType>(builder, loc,
-                                                              dataExv, info);
-  if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(baseOp.getType()))) {
-    bounds = lower::genBaseBoundsOps<BoundsOp, BoundsType>(
-        builder, loc, dataExv, dataExvIsAssumedSize);
-  }
-
-  return bounds;
-}
 } // namespace lower
 } // namespace Fortran
 
diff --git a/flang/include/flang/Optimizer/Builder/DirectivesCommon.h b/flang/include/flang/Optimizer/Builder/DirectivesCommon.h
new file mode 100644
index 0000000000000..443b0ee59007f
--- /dev/null
+++ b/flang/include/flang/Optimizer/Builder/DirectivesCommon.h
@@ -0,0 +1,256 @@
+//===-- DirectivesCommon.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+///
+/// A location to place directive utilities shared across multiple lowering
+/// and optimizer files, e.g. utilities shared in OpenMP and OpenACC.
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_DIRECTIVESCOMMON_H_
+#define FORTRAN_OPTIMIZER_BUILDER_DIRECTIVESCOMMON_H_
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+
+namespace fir::factory {
+
+/// Information gathered to generate bounds operation and data entry/exit
+/// operations.
+struct AddrAndBoundsInfo {
+  explicit AddrAndBoundsInfo() {}
+  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput)
+      : addr(addr), rawInput(rawInput) {}
+  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput,
+                             mlir::Value isPresent)
+      : addr(addr), rawInput(rawInput), isPresent(isPresent) {}
+  explicit AddrAndBoundsInfo(mlir::Value addr, mlir::Value rawInput,
+                             mlir::Value isPresent, mlir::Type boxType)
+      : addr(addr), rawInput(rawInput), isPresent(isPresent), boxType(boxType) {
+  }
+  mlir::Value addr = nullptr;
+  mlir::Value rawInput = nullptr;
+  mlir::Value isPresent = nullptr;
+  mlir::Type boxType = nullptr;
+  void dump(llvm::raw_ostream &os) {
+    os << "AddrAndBoundsInfo addr: " << addr << "\n";
+    os << "AddrAndBoundsInfo rawInput: " << rawInput << "\n";
+    os << "AddrAndBoundsInfo isPresent: " << isPresent << "\n";
+    os << "AddrAndBoundsInfo boxType: " << boxType << "\n";
+  }
+};
+
+inline AddrAndBoundsInfo getDataOperandBaseAddr(fir::FirOpBuilder &builder,
+                                                mlir::Value symAddr,
+                                                bool isOptional,
+                                                mlir::Location loc) {
+  mlir::Value rawInput = symAddr;
+  if (auto declareOp =
+          mlir::dyn_cast_or_null<hlfir::DeclareOp>(symAddr.getDefiningOp())) {
+    symAddr = declareOp.getResults()[0];
+    rawInput = declareOp.getResults()[1];
+  }
+
+  if (!symAddr)
+    llvm::report_fatal_error("could not retrieve symbol address");
+
+  mlir::Value isPresent;
+  if (isOptional)
+    isPresent =
+        builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), rawInput);
+
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(
+          fir::unwrapRefType(symAddr.getType()))) {
+    if (mlir::isa<fir::RecordType>(boxTy.getEleTy()))
+      TODO(loc, "derived type");
+
+    // In case of a box reference, load it here to get the box value.
+    // This is preferrable because then the same box value can then be used for
+    // all address/dimension retrievals. For Fortran optional though, leave
+    // the load generation for later so it can be done in the appropriate
+    // if branches.
+    if (mlir::isa<fir::ReferenceType>(symAddr.getType()) && !isOptional) {
+      mlir::Value addr = builder.create<fir::LoadOp>(loc, symAddr);
+      return AddrAndBoundsInfo(addr, rawInput, isPresent, boxTy);
+    }
+
+    return AddrAndBoundsInfo(symAddr, rawInput, isPresent, boxTy);
+  }
+  return AddrAndBoundsInfo(symAddr, rawInput, isPresent);
+}
+
+template <typename BoundsOp, typename BoundsType>
+llvm::SmallVector<mlir::Value>
+gatherBoundsOrBoundValues(fir::FirOpBuilder &builder, mlir::Location loc,
+                          fir::ExtendedValue dataExv, mlir::Value box,
+                          bool collectValuesOnly = false) {
+  assert(box && "box must exist");
+  llvm::SmallVector<mlir::Value> values;
+  mlir::Value byteStride;
+  mlir::Type idxTy = builder.getIndexType();
+  mlir::Type boundTy = builder.getType<BoundsType>();
+  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+  for (unsigned dim = 0; dim < dataExv.rank(); ++dim) {
+    mlir::Value d = builder.createIntegerConstant(loc, idxTy, dim);
+    mlir::Value baseLb =
+        fir::factory::readLowerBound(builder, loc, dataExv, dim, one);
+    auto dimInfo =
+        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, d);
+    mlir::Value lb = builder.createIntegerConstant(loc, idxTy, 0);
+    mlir::Value ub =
+        builder.create<mlir::arith::SubIOp>(loc, dimInfo.getExtent(), one);
+    if (dim == 0) // First stride is the element size.
+      byteStride = dimInfo.getByteStride();
+    if (collectValuesOnly) {
+      values.push_back(lb);
+      values.push_back(ub);
+      values.push_back(dimInfo.getExtent());
+      values.push_back(byteStride);
+      values.push_back(baseLb);
+    } else {
+      mlir::Value bound = builder.create<BoundsOp>(
+          loc, boundTy, lb, ub, dimInfo.getExtent(), byteStride, true, baseLb);
+      values.push_back(bound);
+    }
+    // Compute the stride for the next dimension.
+    byteStride = builder.create<mlir::arith::MulIOp>(loc, byteStride,
+                                                     dimInfo.getExtent());
+  }
+  return values;
+}
+
+/// Generate the bounds operation from the descriptor information.
+template <typename BoundsOp, typename BoundsType>
+llvm::SmallVector<mlir::Value>
+genBoundsOpsFromBox(fir::FirOpBuilder &builder, mlir::Location loc,
+                    fir::ExtendedValue dataExv, AddrAndBoundsInfo &info) {
+  llvm::SmallVector<mlir::Value> bounds;
+  mlir::Type idxTy = builder.getIndexType();
+  mlir::Type boundTy = builder.getType<BoundsType>();
+
+  assert(mlir::isa<fir::BaseBoxType>(info.boxType) &&
+         "expect fir.box or fir.class");
+  assert(fir::unwrapRefType(info.addr.getType()) == info.boxType &&
+         "expected box type consistency");
+
+  if (info.isPresent) {
+    llvm::SmallVector<mlir::Type> resTypes;
+    constexpr unsigned nbValuesPerBound = 5;
+    for (unsigned dim = 0; dim < dataExv.rank() * nbValuesPerBound; ++dim)
+      resTypes.push_back(idxTy);
+
+    mlir::Operation::result_range ifRes =
+        builder.genIfOp(loc, resTypes, info.isPresent, /*withElseRegion=*/true)
+            .genThen([&]() {
+              mlir::Value box =
+                  !fir::isBoxAddress(info.addr.getType())
+                      ? info.addr
+                      : builder.create<fir::LoadOp>(loc, info.addr);
+              llvm::SmallVector<mlir::Value> boundValues =
+                  gatherBoundsOrBoundValues<BoundsOp, BoundsType>(
+                      builder, loc, dataExv, box,
+                      /*collectValuesOnly=*/true);
+              builder.create<fir::ResultOp>(loc, boundValues);
+            })
+            .genElse([&] {
+              // Box is not present. Populate bound values with default values.
+              llvm::SmallVector<mlir::Value> boundValues;
+              mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+              mlir::Value mOne = builder.createMinusOneInteger(loc, idxTy);
+              for (unsigned dim = 0; dim < dataExv.rank(); ++dim) {
+                boundValues.push_back(zero); // lb
+                boundValues.push_back(mOne); // ub
+                boundValues.push_back(zero); // extent
+                boundValues.push_back(zero); // byteStride
+                boundValues.push_back(zero); // baseLb
+              }
+              builder.create<fir::ResultOp>(loc, boundValues);
+            })
+            .getResults();
+    // Create the bound operations outside the if-then-else with the if op
+    // results.
+    for (unsigned i = 0; i < ifRes.size(); i += nbValuesPerBound) {
+      mlir::Value bound = builder.create<BoundsOp>(
+          loc, boundTy, ifRes[i], ifRes[i + 1], ifRes[i + 2], ifRes[i + 3],
+          true, ifRes[i + 4]);
+      bounds.push_back(bound);
+    }
+  } else {
+    mlir::Value box = !fir::isBoxAddress(info.addr.getType())
+                          ? info.addr
+                          : builder.create<fir::LoadOp>(loc, info.addr);
+    bounds = gatherBoundsOrBoundValues<BoundsOp, BoundsType>(builder, loc,
+                                                             dataExv, box);
+  }
+  return bounds;
+}
+
+/// Generate bounds operation for base array without any subscripts
+/// provided.
+template <typename BoundsOp, typename BoundsType>
+llvm::SmallVector<mlir::Value>
+genBaseBoundsOps(fir::FirOpBuilder &builder, mlir::Location loc,
+                 fir::ExtendedValue dataExv, bool isAssumedSize) {
+  mlir::Type idxTy = builder.getIndexType();
+  mlir::Type boundTy = builder.getType<BoundsType>();
+  llvm::SmallVector<mlir::Value> bounds;
+
+  if (dataExv.rank() == 0)
+    return bounds;
+
+  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+  const unsigned rank = dataExv.rank();
+  for (unsigned dim = 0; dim < rank; ++dim) {
+    mlir::Value baseLb =
+        fir::factory::readLowerBound(builder, loc, dataExv, dim, one);
+    mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+    mlir::Value ub;
+    mlir::Value lb = zero;
+    mlir::Value ext = fir::factory::readExtent(builder, loc, dataExv, dim);
+    if (isAssumedSize && dim + 1 == rank) {
+      ext = zero;
+      ub = lb;
+    } else {
+      // ub = extent - 1
+      ub = builder.create<mlir::arith::SubIOp>(loc, ext, one);
+    }
+
+    mlir::Value bound =
+        builder.create<BoundsOp>(loc, boundTy, lb, ub, ext, one, false, baseLb);
+    bounds.push_back(bound);
+  }
+  return bounds;
+}
+
+template <typename BoundsOp, typename BoundsType>
+llvm::SmallVector<mlir::Value>
+genImplicitBoundsOps(fir::FirOpBuilder &builder, AddrAndBoundsInfo &info,
+                     fir::ExtendedValue dataExv, bool dataExvIsAssumedSize,
+                     mlir::Location loc) {
+  llvm::SmallVector<mlir::Value> bounds;
+
+  mlir::Value baseOp = info.rawInput;
+  if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(baseOp.getType())))
+    bounds =
+        genBoundsOpsFromBox<BoundsOp, BoundsType>(builder, loc, dataExv, info);
+  if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(baseOp.getType()))) {
+    bounds = genBaseBoundsOps<BoundsOp, BoundsType>(builder, loc, dataExv,
+                                                    dataExvIsAssumedSize);
+  }
+
+  return bounds;
+}
+
+} // namespace fir::factory
+#endif // FORTRAN_OPTIMIZER_BUILDER_DIRECTIVESCOMMON_H_
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 86d8a549331fb..456c30264d068 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -352,7 +352,7 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
     Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
     Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
         [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
-    Fortran::lower::AddrAndBoundsInfo info =
+    fir::factory::AddrAndBoundsInfo info =
         Fortran::lower::gatherDataOperandAddrAndBounds<
             mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
             converter, builder, semanticsContext, stmtCtx, symbol, designator,
@@ -392,7 +392,7 @@ static void genDeclareDataOperandOperations(
     Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
     Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
         [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
-    Fortran::lower::AddrAndBoundsInfo info =
+    fir::factory::AddrAndBoundsInfo info =
         Fortran::lower::gatherDataOperandAddrAndBounds<
             mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
             converter, builder, semanticsContext, stmtCtx, symbol, designator,
@@ -855,7 +855,7 @@ genPrivatizations(const Fortran::parser::AccObjectList &objectList,
     Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
     Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
         [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
-    Fortran::lower::AddrAndBoundsInfo info =
+    fir::factory::AddrAndBoundsInfo info =
         Fortran::lower::gatherDataOperandAddrAndBounds<
             mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
             converter, builder, semanticsContext, stmtCtx, symbol, designator,
@@ -1436,7 +1436,7 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
     Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
     Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
         [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
-    Fortran::lower::AddrAndBoundsInfo info =
+    fir::factory::AddrAndBoundsInfo info =
         Fortran::lower::gatherDataOperandAddrAndBounds<
             mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
             converter, builder, semanticsContext, stmtCtx, symbol, designator,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index fb8e007c7af57..299d9d438f115 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -949,7 +949,7 @@ void ClauseProcessor::processMapObjects(
     std::stringstream asFortran;
     std::optional<omp::Object> parentObj;
 
-    lower::AddrAndBoundsInfo info =
+    fir::factory::AddrAndBoundsInfo info =
         lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
                                               mlir::omp::MapBoundsType>(
             converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 39b4de919c8ba..1434bcd6330e0 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1346,11 +1346,12 @@ static void genBodyOfTargetOp(
             firOpBuilder.createTemporary(val.getLoc(), val.getType());
         firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
 
-        lower::AddrAndBoundsInfo info = lower::getDataOperandBaseAddr(
-            firOpBuilder, val, /*isOptional=*/false, val.getLoc());
+        fir::factory::AddrAndBoundsInfo info =
+            fir::factory::getDataOperandBaseAddr(
+                firOpBuilder, val, /*isOptional=*/false, val.getLoc());
         llvm::SmallVector<mlir::Value> bounds =
-            Fortran::lower::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
-                                                 mlir::omp::MapBoundsType>(
+            fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+                                               mlir::omp::MapBoundsType>(
                 firOpBuilder, info,
                 hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
                                                 hlfir::Entity{val})
@@ -2188,11 +2189,12 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
       fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
       name << sym.name().ToString();
 
-      lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(
-          converter, firOpBuilder, sym, converter.getCurrentLocation());
+      fir::factory::AddrAndBoundsInfo info =
+          Fortran::lower::getDataOperandBaseAddr(
+              converter, firOpBuilder, sym, converter.getCurrentLocation());
       llvm::SmallVector<mlir::Value> bounds =
-          lower::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
-                                      mlir::omp::MapBoundsType>(
+          fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+                                             mlir::omp::MapBoundsType>(
               firOpBuilder, info, dataExv,
               semantics::IsAssumedSizeArray(sym.GetUltimate()),
               converter.getCurrentLocation());
@@ -2511,7 +2513,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
 
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           enableDelayedPrivatization, symTable);
+                           enableDelayedPrivatizationStaging, symTable);
   dsp.processStep1(&wsloopClauseOps);
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 9971dc8e0b001..35722fa7d1b12 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -310,8 +310,9 @@ mlir::Value createParentSymAndGenIntermediateMaps(
   };
 
   // Generate the access to the original parent base address.
-  lower::AddrAndBoundsInfo parentBaseAddr = lower::getDataOperandBaseAddr(
-      converter, firOpBuilder, *objectList[0].sym(), clauseLocation);
+  fir::factory::AddrAndBoundsInfo parentBaseAddr =
+      lower::getDataOperandBaseAddr(converter, firOpBuilder,
+                                    *objectList[0].sym(), clauseLocation);
   mlir::Value curValue = parentBaseAddr.addr;
 
   // Iterate over all objects in the objectList, this should consist of all
@@ -560,7 +561,7 @@ void insertChildMapInfoIntoParent(
       // Create parent to emplace and bind members
       llvm::SmallVector<mlir::Value> bounds;
       std::stringstream asFortran;
-      lower::AddrAndBoundsInfo info =
+      fir::factory::AddrAndBoundsInfo info =
           lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
                                                 mlir::omp::MapBoundsType>(
               converter, firOpBuilder, semaCtx, converter.getFctCtx(),
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index 81c8a68b95367..553c20bb85d38 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -11,11 +11,13 @@ add_flang_library(FIRCodeGen
   TypeConverter.cpp
 
   DEPENDS
+  CUFAttrs
   FIRDialect
   FIROptCodeGenPassIncGen
   CGOpsIncGen
 
   LINK_LIBS
+  CUFAttrs
   FIRAnalysis
   FIRBuilder
   FIRDialect
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 5ba93fefab3f9..43c0e2686a8c3 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1725,15 +1725,35 @@ struct EmboxOpConversion : public EmboxCommonConversion<fir::EmboxOp> {
   }
 };
 
-static bool isDeviceAllocation(mlir::Value val) {
+static bool isDeviceAllocation(mlir::Value val, mlir::Value adaptorVal) {
   if (auto loadOp = mlir::dyn_cast_or_null<fir::LoadOp>(val.getDefiningOp()))
-    return isDeviceAllocation(loadOp.getMemref());
+    return isDeviceAllocation(loadOp.getMemref(), {});
   if (auto boxAddrOp =
           mlir::dyn_cast_or_null<fir::BoxAddrOp>(val.getDefiningOp()))
-    return isDeviceAllocation(boxAddrOp.getVal());
+    return isDeviceAllocation(boxAddrOp.getVal(), {});
   if (auto convertOp =
           mlir::dyn_cast_or_null<fir::ConvertOp>(val.getDefiningOp()))
-    return isDeviceAllocation(convertOp.getValue());
+    return isDeviceAllocation(convertOp.getValue(), {});
+  if (!val.getDefiningOp() && adaptorVal) {
+    if (auto blockArg = llvm::cast<mlir::BlockArgument>(adaptorVal)) {
+      if (blockArg.getOwner() && blockArg.getOwner()->getParentOp() &&
+          blockArg.getOwner()->isEntryBlock()) {
+        if (auto func = mlir::dyn_cast_or_null<mlir::FunctionOpInterface>(
+                *blockArg.getOwner()->getParentOp())) {
+          auto argAttrs = func.getArgAttrs(blockArg.getArgNumber());
+          for (auto attr : argAttrs) {
+            if (attr.getName().getValue().ends_with(cuf::getDataAttrName())) {
+              auto dataAttr =
+                  mlir::dyn_cast<cuf::DataAttributeAttr>(attr.getValue());
+              if (dataAttr.getValue() != cuf::DataAttribute::Pinned &&
+                  dataAttr.getValue() != cuf::DataAttribute::Unified)
+                return true;
+            }
+          }
+        }
+      }
+    }
+  }
   if (auto callOp = mlir::dyn_cast_or_null<fir::CallOp>(val.getDefiningOp()))
     if (callOp.getCallee() &&
         (callOp.getCallee().value().getRootReference().getValue().starts_with(
@@ -1928,7 +1948,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
     if (fir::isDerivedTypeWithLenParams(boxTy))
       TODO(loc, "fir.embox codegen of derived with length parameters");
     mlir::Value result = placeInMemoryIfNotGlobalInit(
-        rewriter, loc, boxTy, dest, isDeviceAllocation(xbox.getMemref()));
+        rewriter, loc, boxTy, dest,
+        isDeviceAllocation(xbox.getMemref(), adaptor.getMemref()));
     rewriter.replaceOp(xbox, result);
     return mlir::success();
   }
@@ -2052,9 +2073,9 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
       dest = insertStride(rewriter, loc, dest, dim, std::get<1>(iter.value()));
     }
     dest = insertBaseAddress(rewriter, loc, dest, base);
-    mlir::Value result =
-        placeInMemoryIfNotGlobalInit(rewriter, rebox.getLoc(), destBoxTy, dest,
-                                     isDeviceAllocation(rebox.getBox()));
+    mlir::Value result = placeInMemoryIfNotGlobalInit(
+        rewriter, rebox.getLoc(), destBoxTy, dest,
+        isDeviceAllocation(rebox.getBox(), rebox.getBox()));
     rewriter.replaceOp(rebox, result);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp
index e6a484923c706..94ab31de1763d 100644
--- a/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp
@@ -11,8 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/OpenACC/FIROpenACCTypeInterfaces.h"
-#include "flang/Lower/DirectivesCommon.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
@@ -180,10 +180,10 @@ OpenACCMappableModel<fir::SequenceType>::generateAccBounds(
       }
       // TODO: Handle Fortran optional.
       const mlir::Value isPresent;
-      Fortran::lower::AddrAndBoundsInfo info(box, boxRef, isPresent,
-                                             box.getType());
-      return Fortran::lower::genBoundsOpsFromBox<mlir::acc::DataBoundsOp,
-                                                 mlir::acc::DataBoundsType>(
+      fir::factory::AddrAndBoundsInfo info(box, boxRef, isPresent,
+                                           box.getType());
+      return fir::factory::genBoundsOpsFromBox<mlir::acc::DataBoundsOp,
+                                               mlir::acc::DataBoundsType>(
           firBuilder, loc, exv, info);
     }
     assert(false && "array with unknown dimension expected to have descriptor");
@@ -200,8 +200,8 @@ OpenACCMappableModel<fir::SequenceType>::generateAccBounds(
   auto res = hlfir::translateToExtendedValue(loc, firBuilder,
                                              hlfir::Entity(valToCheck));
   fir::ExtendedValue exv = res.first;
-  return Fortran::lower::genBaseBoundsOps<mlir::acc::DataBoundsOp,
-                                          mlir::acc::DataBoundsType>(
+  return fir::factory::genBaseBoundsOps<mlir::acc::DataBoundsOp,
+                                        mlir::acc::DataBoundsType>(
       firBuilder, loc, exv,
       /*isAssumedSize=*/isAssumedSize);
 }
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index 9fe2d3947c26d..c8509ccc7fea5 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -22,7 +22,6 @@ add_flang_library(FlangOpenMPTransforms
   FIRDialectSupport
   FIRSupport
   FortranCommon
-  FortranEvaluate
   HLFIRDialect
 
   MLIR_LIBS
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index c3c1f3b2848b8..555601c5e92df 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -30,19 +30,39 @@ class GenericLoopConversionPattern
     : public mlir::OpConversionPattern<mlir::omp::LoopOp> {
 public:
   enum class GenericLoopCombinedInfo {
-    None,
+    Standalone,
     TargetTeamsLoop,
     TargetParallelLoop
   };
 
   using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
 
+  explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx)
+      : mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
+    // Enable rewrite recursion to make sure nested `loop` directives are
+    // handled.
+    this->setHasBoundedRewriteRecursion(true);
+  }
+
   mlir::LogicalResult
   matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
     assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp)));
 
-    rewriteToDistributeParallelDo(loopOp, rewriter);
+    GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
+
+    switch (combinedInfo) {
+    case GenericLoopCombinedInfo::Standalone:
+      rewriteToSimdLoop(loopOp, rewriter);
+      break;
+    case GenericLoopCombinedInfo::TargetParallelLoop:
+      llvm_unreachable("not yet implemented: `parallel loop` direcitve");
+      break;
+    case GenericLoopCombinedInfo::TargetTeamsLoop:
+      rewriteToDistributeParallelDo(loopOp, rewriter);
+      break;
+    }
+
     rewriter.eraseOp(loopOp);
     return mlir::success();
   }
@@ -52,9 +72,8 @@ class GenericLoopConversionPattern
     GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
 
     switch (combinedInfo) {
-    case GenericLoopCombinedInfo::None:
-      return loopOp.emitError(
-          "not yet implemented: Standalone `omp loop` directive");
+    case GenericLoopCombinedInfo::Standalone:
+      break;
     case GenericLoopCombinedInfo::TargetParallelLoop:
       return loopOp.emitError(
           "not yet implemented: Combined `omp target parallel loop` directive");
@@ -86,7 +105,7 @@ class GenericLoopConversionPattern
   static GenericLoopCombinedInfo
   findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) {
     mlir::Operation *parentOp = loopOp->getParentOp();
-    GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None;
+    GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
 
     if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
       if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
@@ -100,6 +119,62 @@ class GenericLoopConversionPattern
     return result;
   }
 
+  /// Rewrites standalone `loop` directives to equivalent `simd` constructs.
+  /// The reasoning behind this decision is that according to the spec (version
+  /// 5.2, section 11.7.1):
+  ///
+  /// "If the bind clause is not specified on a construct for which it may be
+  /// specified and the construct is closely nested inside a teams or parallel
+  /// construct, the effect is as if binding is teams or parallel. If none of
+  /// those conditions hold, the binding region is not defined."
+  ///
+  /// which means that standalone `loop` directives have undefined binding
+  /// region. Moreover, the spec says (in the next paragraph):
+  ///
+  /// "The specified binding region determines the binding thread set.
+  /// Specifically, if the binding region is a teams region, then the binding
+  /// thread set is the set of initial threads that are executing that region
+  /// while if the binding region is a parallel region, then the binding thread
+  /// set is the team of threads that are executing that region. If the binding
+  /// region is not defined, then the binding thread set is the encountering
+  /// thread."
+  ///
+  /// which means that the binding thread set for a standalone `loop` directive
+  /// is only the encountering thread.
+  ///
+  /// Since the encountering thread is the binding thread (set) for a
+  /// standalone `loop` directive, the best we can do in such case is to "simd"
+  /// the directive.
+  void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
+                         mlir::ConversionPatternRewriter &rewriter) const {
+    loopOp.emitWarning("Detected standalone OpenMP `loop` directive, the "
+                       "associated loop will be rewritten to `simd`.");
+    mlir::omp::SimdOperands simdClauseOps;
+    simdClauseOps.privateVars = loopOp.getPrivateVars();
+
+    auto privateSyms = loopOp.getPrivateSyms();
+    if (privateSyms)
+      simdClauseOps.privateSyms.assign(privateSyms->begin(),
+                                       privateSyms->end());
+
+    Fortran::common::openmp::EntryBlockArgs simdArgs;
+    simdArgs.priv.vars = simdClauseOps.privateVars;
+
+    auto simdOp =
+        rewriter.create<mlir::omp::SimdOp>(loopOp.getLoc(), simdClauseOps);
+    mlir::Block *simdBlock =
+        genEntryBlock(rewriter, simdArgs, simdOp.getRegion());
+
+    mlir::IRMapping mapper;
+    mlir::Block &loopBlock = *loopOp.getRegion().begin();
+
+    for (auto [loopOpArg, simdopArg] :
+         llvm::zip_equal(loopBlock.getArguments(), simdBlock->getArguments()))
+      mapper.map(loopOpArg, simdopArg);
+
+    rewriter.clone(*loopOp.begin(), mapper);
+  }
+
   void rewriteToDistributeParallelDo(
       mlir::omp::LoopOp loopOp,
       mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index c63d2f4531a6f..98e325c307d97 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -24,7 +24,7 @@
 /// indirectly via a parent object.
 //===----------------------------------------------------------------------===//
 
-#include "flang/Lower/DirectivesCommon.h"
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
@@ -598,12 +598,12 @@ class MapInfoFinalizationPass
           auto fieldCoord = builder.create<fir::CoordinateOp>(
               op.getLoc(), builder.getRefType(memTy), op.getVarPtr(),
               fieldIdxVal);
-          Fortran::lower::AddrAndBoundsInfo info =
-              Fortran::lower::getDataOperandBaseAddr(
+          fir::factory::AddrAndBoundsInfo info =
+              fir::factory::getDataOperandBaseAddr(
                   builder, fieldCoord, /*isOptional=*/false, op.getLoc());
           llvm::SmallVector<mlir::Value> bounds =
-              Fortran::lower::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
-                                                   mlir::omp::MapBoundsType>(
+              fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+                                                 mlir::omp::MapBoundsType>(
                   builder, info,
                   hlfir::translateToExtendedValue(op.getLoc(), builder,
                                                   hlfir::Entity{fieldCoord})
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index f3c2a5bf094d0..aee0357333159 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -736,6 +736,8 @@ class ScopeHandler : public ImplicitRulesVisitor {
     std::vector<const std::list<parser::EquivalenceObject> *> equivalenceSets;
     // Names of all common block objects in the scope
     std::set<SourceName> commonBlockObjects;
+    // Names of all names that show in a declare target declaration
+    std::set<SourceName> declareTargetNames;
     // Info about SAVE statements and attributes in current scope
     struct {
       std::optional<SourceName> saveAll; // "SAVE" without entity list
@@ -1223,6 +1225,7 @@ class DeclarationVisitor : public ArraySpecVisitor,
   const parser::Name *FindComponent(const parser::Name *, const parser::Name &);
   void Initialization(const parser::Name &, const parser::Initialization &,
       bool inComponentDecl);
+  bool FindAndMarkDeclareTargetSymbol(const parser::Name &);
   bool PassesLocalityChecks(
       const parser::Name &name, Symbol &symbol, Symbol::Flag flag);
   bool CheckForHostAssociatedImplicit(const parser::Name &);
@@ -1524,7 +1527,47 @@ class OmpVisitor : public virtual DeclarationVisitor {
     return true;
   }
   void Post(const parser::OpenMPThreadprivate &) { SkipImplicitTyping(false); }
-  bool Pre(const parser::OpenMPDeclareTargetConstruct &) {
+  bool Pre(const parser::OpenMPDeclareTargetConstruct &x) {
+    const auto &spec{std::get<parser::OmpDeclareTargetSpecifier>(x.t)};
+    auto populateDeclareTargetNames{
+        [this](const parser::OmpObjectList &objectList) {
+          for (const auto &ompObject : objectList.v) {
+            common::visit(
+                common::visitors{
+                    [&](const parser::Designator &designator) {
+                      if (const auto *name{
+                              semantics::getDesignatorNameIfDataRef(
+                                  designator)}) {
+                        specPartState_.declareTargetNames.insert(name->source);
+                      }
+                    },
+                    [&](const parser::Name &name) {
+                      specPartState_.declareTargetNames.insert(name.source);
+                    },
+                },
+                ompObject.u);
+          }
+        }};
+
+    if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
+      populateDeclareTargetNames(*objectList);
+    } else if (const auto *clauseList{
+                   parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
+      for (const auto &clause : clauseList->v) {
+        if (const auto *toClause{
+                std::get_if<parser::OmpClause::To>(&clause.u)}) {
+          populateDeclareTargetNames(
+              std::get<parser::OmpObjectList>(toClause->v.t));
+        } else if (const auto *linkClause{
+                       std::get_if<parser::OmpClause::Link>(&clause.u)}) {
+          populateDeclareTargetNames(linkClause->v);
+        } else if (const auto *enterClause{
+                       std::get_if<parser::OmpClause::Enter>(&clause.u)}) {
+          populateDeclareTargetNames(enterClause->v);
+        }
+      }
+    }
+
     SkipImplicitTyping(true);
     return true;
   }
@@ -8126,7 +8169,12 @@ const parser::Name *DeclarationVisitor::ResolveDataRef(
 // If implicit types are allowed, ensure name is in the symbol table.
 // Otherwise, report an error if it hasn't been declared.
 const parser::Name *DeclarationVisitor::ResolveName(const parser::Name &name) {
-  FindSymbol(name);
+  if (!FindSymbol(name)) {
+    if (FindAndMarkDeclareTargetSymbol(name)) {
+      return &name;
+    }
+  }
+
   if (CheckForHostAssociatedImplicit(name)) {
     NotePossibleBadForwardRef(name);
     return &name;
@@ -8313,6 +8361,47 @@ const parser::Name *DeclarationVisitor::FindComponent(
   return nullptr;
 }
 
+bool DeclarationVisitor::FindAndMarkDeclareTargetSymbol(
+    const parser::Name &name) {
+  if (!specPartState_.declareTargetNames.empty()) {
+    if (specPartState_.declareTargetNames.count(name.source)) {
+      if (!currScope().IsTopLevel()) {
+        // Search preceding scopes until we find a matching symbol or run out
+        // of scopes to search, we skip the current scope as it's already been
+        // designated as implicit here.
+        for (auto *scope = &currScope().parent();; scope = &scope->parent()) {
+          if (Symbol * symbol{scope->FindSymbol(name.source)}) {
+            if (symbol->test(Symbol::Flag::Subroutine) ||
+                symbol->test(Symbol::Flag::Function)) {
+              const auto [sym, success]{currScope().try_emplace(
+                  symbol->name(), Attrs{}, HostAssocDetails{*symbol})};
+              assert(success &&
+                  "FindAndMarkDeclareTargetSymbol could not emplace new "
+                  "subroutine/function symbol");
+              name.symbol = &*sym->second;
+              symbol->test(Symbol::Flag::Subroutine)
+                  ? name.symbol->set(Symbol::Flag::Subroutine)
+                  : name.symbol->set(Symbol::Flag::Function);
+              return true;
+            }
+            // if we find a symbol that is not a function or subroutine, we
+            // currently escape without doing anything.
+            break;
+          }
+
+          // This is our loop exit condition, as parent() has an inbuilt assert
+          // if you call it on a top level scope, rather than returning a null
+          // value.
+          if (scope->IsTopLevel()) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 void DeclarationVisitor::Initialization(const parser::Name &name,
     const parser::Initialization &init, bool inComponentDecl) {
   // Traversal of the initializer was deferred to here so that the
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index 3ad28fa7bd517..7ac89836a3ff1 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -170,3 +170,20 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vec
 
 // CHECK-LABEL: llvm.func @_QQmain()
 // CHECK-COUNT-3: llvm.call @_FortranACUFAllocDescriptor
+
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (git@github.com:clementval/llvm-project.git efc2415bcce8e8a9e73e77aa122c8aba1c1fbbd2)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  func.func @_QPouter(%arg0: !fir.ref<!fir.array<100x100xf64>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) {
+    %c0_i32 = arith.constant 0 : i32
+    %c100 = arith.constant 100 : index
+    %0 = fir.alloca tuple<!fir.box<!fir.array<100x100xf64>>>
+    %1 = fir.coordinate_of %0, %c0_i32 : (!fir.ref<tuple<!fir.box<!fir.array<100x100xf64>>>>, i32) -> !fir.ref<!fir.box<!fir.array<100x100xf64>>>
+    %2 = fircg.ext_embox %arg0(%c100, %c100) : (!fir.ref<!fir.array<100x100xf64>>, index, index) -> !fir.box<!fir.array<100x100xf64>>
+    fir.store %2 to %1 : !fir.ref<!fir.box<!fir.array<100x100xf64>>>
+    return
+  }
+}
+
+// CHECK-LABEL: llvm.func @_QPouter
+// CHECK: _FortranACUFAllocDescriptor
diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir
index d8f820263ffd0..4494cfa570dd7 100644
--- a/flang/test/HLFIR/unroll-loops.fir
+++ b/flang/test/HLFIR/unroll-loops.fir
@@ -4,7 +4,7 @@
 // RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
 
 // FIXME: https://github.com/llvm/llvm-project/issues/123668
-// XFAIL: powerpc64-target-arch
+// XFAIL: target=powerpc64{{.*}}
 
 // CHECK-LABEL: @unroll
 // CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90
index 4a356c1ec5e9a..4b4a394502881 100644
--- a/flang/test/Integration/unroll-loops.f90
+++ b/flang/test/Integration/unroll-loops.f90
@@ -4,7 +4,7 @@
 ! RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
 
 ! FIXME: https://github.com/llvm/llvm-project/issues/123668
-! XFAIL: powerpc64-target-arch
+! XFAIL: target=powerpc64{{.*}}
 
 ! CHECK-LABEL: @unroll
 ! CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/wsloop.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/wsloop.f90
index c98850b8000d3..66fd120085c78 100644
--- a/flang/test/Lower/OpenMP/DelayedPrivatization/wsloop.f90
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/wsloop.f90
@@ -1,6 +1,6 @@
-! RUN: %flang_fc1 -emit-hlfir -fopenmp \
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \
 ! RUN:   -o - %s 2>&1 | FileCheck %s
-! RUN: bbc -emit-hlfir -fopenmp  -o - %s 2>&1 \
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \
 ! RUN:   | FileCheck %s
 
 subroutine wsloop_private
diff --git a/flang/test/Lower/OpenMP/Todo/allocate-clause-allocator.f90 b/flang/test/Lower/OpenMP/Todo/allocate-clause-allocator.f90
index 90158a61f7503..45b3aa8ee77f9 100644
--- a/flang/test/Lower/OpenMP/Todo/allocate-clause-allocator.f90
+++ b/flang/test/Lower/OpenMP/Todo/allocate-clause-allocator.f90
@@ -1,11 +1,10 @@
 ! REQUIRES: openmp_runtime
-! RUN: %not_todo_cmd %flang_fc1 -emit-llvm -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
 
 ! CHECK: not yet implemented: Unhandled clause allocate in omp.parallel
 ! CHECK: LLVM Translation failed for operation: omp.parallel
 program p
-  !use omp_lib
-  integer(8),parameter::omp_default_mem_alloc=1_8
+  use omp_lib
   integer :: x
   integer :: a
   integer :: i
diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
index 10879c53dc0c5..77a1304f39a48 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
@@ -9,10 +9,11 @@
 ! The string "EXPECTED" denotes the expected FIR
 
 ! CHECK: omp.parallel  private(@{{.*}} %{{.*}} -> %[[PRIVATE_Y:.*]], @{{.*}} %{{.*}} -> %[[PRIVATE_Y:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: %[[TEMP:.*]] = fir.alloca i32 {bindc_name = "x", pinned, {{.*}}}
 ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop private(@{{.*}} %{{.*}} -> %[[TEMP:.*]] : !fir.ref<i32>) {
+! CHECK: omp.wsloop {
 ! CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
 ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref<i32>
 ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/associate.f90 b/flang/test/Lower/OpenMP/associate.f90
index d497b4ade782e..4964890a6842c 100644
--- a/flang/test/Lower/OpenMP/associate.f90
+++ b/flang/test/Lower/OpenMP/associate.f90
@@ -6,12 +6,12 @@
 !CHECK:         omp.parallel {
 !CHECK-NOT:       hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEa"}
 !CHECK-NOT:       hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEb"}
-!CHECK:           omp.wsloop private({{.*}}) {
+!CHECK:           omp.wsloop {
 !CHECK:           }
 !CHECK:         }
 !CHECK:         omp.parallel {{.*}} {
 !CHECK-NOT:       hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEb"}
-!CHECK:           omp.wsloop private({{.*}}) {
+!CHECK:           omp.wsloop {
 !CHECK:           }
 !CHECK:         }
 subroutine test_parallel_assoc()
diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index 5ad45f1f5ba6f..9e9ccf8e3d914 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -154,13 +154,14 @@ subroutine copyin_derived_type()
 
 ! CHECK:             omp.barrier
 
+! CHECK:             %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFcombined_parallel_worksharing_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}} -> %[[VAL_6:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_11]]) to (%[[VAL_12]]) inclusive step (%[[VAL_13]]) {
-! CHECK:                 %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFcombined_parallel_worksharing_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
 ! CHECK:                 fir.call @_QPsub4(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
 ! CHECK:                 omp.yield
@@ -320,12 +321,15 @@ subroutine common_1()
 ! CHECK:             %[[VAL_33:.*]] = fir.load %[[VAL_18]]#0 : !fir.ref<i32>
 ! CHECK:             hlfir.assign %[[VAL_33]] to %[[VAL_31]]#0 : i32, !fir.ref<i32>
 ! CHECK:             omp.barrier
+
+! CHECK:             %[[VAL_19:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFcommon_2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<i32>
 ! CHECK:             %[[VAL_36:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}} -> %[[VAL_19:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_37:.*]]) : i32 = (%[[VAL_34]]) to (%[[VAL_35]]) inclusive step (%[[VAL_36]]) {
-! CHECK:             %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFcommon_2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_37]] to %[[VAL_20]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/critical.f90 b/flang/test/Lower/OpenMP/critical.f90
index 99a4426ab0453..051d378210646 100644
--- a/flang/test/Lower/OpenMP/critical.f90
+++ b/flang/test/Lower/OpenMP/critical.f90
@@ -38,10 +38,11 @@ subroutine predetermined_privatization()
   !CHECK: omp.parallel
   !$omp parallel do
 
+  !CHECK: %[[PRIV_I_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+  !CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I_ALLOC]]
   do i = 2, 10
-    !CHECK: omp.wsloop private(@{{.*}} %{{.*}} -> %[[PRIV_I_ALLOC:.*]] : !fir.ref<i32>)
+    !CHECK: omp.wsloop
     !CHECK: omp.loop_nest (%[[IV:[^[:space:]]+]])
-    !CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I_ALLOC]]
     !CHECK: fir.store %[[IV]] to %[[PRIV_I_DECL]]#1
     !CHECK: omp.critical
     !$omp critical
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
index 10e62005f42ba..654c13ada9e39 100644
--- a/flang/test/Lower/OpenMP/default-clause-byref.f90
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -346,7 +346,7 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {{.*}} {
-!CHECK: omp.wsloop private({{.*}}) reduction(byref @min_byref_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.wsloop reduction(byref @min_byref_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
 !CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index fcc8d033eea0f..c004813a911f7 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -284,13 +284,16 @@ subroutine nested_default_clause_test4
 !CHECK-LABEL: func @_QPnested_default_clause_test5
 !CHECK: omp.parallel {
 
+!CHECK: %[[X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_test5Ex"}
+!CHECK: %[[X_DECLARE:.*]]:2 = hlfir.declare %[[X_ALLOCA]] {{.*}}
+
+!CHECK: %[[LOOP_VAR_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+!CHECK: %[[LOOP_VAR_DECLARE:.*]]:2 = hlfir.declare %[[LOOP_VAR_ALLOCA]] {{.*}}
+
 !CHECK: %[[CONST_LB:.*]] = arith.constant 1 : i32
 !CHECK: %[[CONST_UB:.*]] = arith.constant 50 : i32
 !CHECK: %[[CONST_STEP:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop private(@{{.*}} %{{.*}} -> %[[X_ALLOCA:.*]], @{{.*}} %{{.*}} -> %[[LOOP_VAR_ALLOCA:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
 !CHECK: omp.loop_nest (%[[ARG:.*]]) : i32 = (%[[CONST_LB]]) to (%[[CONST_UB]]) inclusive step (%[[CONST_STEP]]) {
-!CHECK: %[[X_DECLARE:.*]]:2 = hlfir.declare %[[X_ALLOCA]] {{.*}}
-!CHECK: %[[LOOP_VAR_DECLARE:.*]]:2 = hlfir.declare %[[LOOP_VAR_ALLOCA]] {{.*}}
 !CHECK: fir.store %[[ARG]] to %[[LOOP_VAR_DECLARE]]#1 : !fir.ref<i32>
 !CHECK: %[[LOADED_X:.*]] = fir.load %[[X_DECLARE]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
@@ -318,12 +321,13 @@ subroutine nested_default_clause_test5
 
 !CHECK: %[[Z_VAR_DECLARE:.*]]:2 = hlfir.declare %[[Z_VAR]] {{.*}}
 
+!CHECK: %[[LOOP_VAR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+!CHECK: %[[LOOP_VAR_DECLARE:.*]]:2 = hlfir.declare %[[LOOP_VAR]] {{.*}}
+
 !CHECK: %[[CONST_LB:.*]] = arith.constant 1 : i32
 !CHECK: %[[CONST_UB:.*]] = arith.constant 10 : i32
 !CHECK: %[[CONST_STEP:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop private(@{{.*}} %{{.*}} -> %[[LOOP_VAR:.*]] : !fir.ref<i32>) {
 !CHECK: omp.loop_nest (%[[ARG:.*]]) : i32 = (%[[CONST_LB]]) to (%[[CONST_UB]]) inclusive step (%[[CONST_STEP]]) {
-!CHECK: %[[LOOP_VAR_DECLARE:.*]]:2 = hlfir.declare %[[LOOP_VAR]] {{.*}}
 !CHECK: fir.store %[[ARG]] to %[[LOOP_VAR_DECLARE]]#1 : !fir.ref<i32>
 !CHECK: %[[LOADED_X:.*]] = fir.load %[[X_VAR_DECLARE]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
@@ -382,7 +386,7 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {{.*}} {
-!CHECK: omp.wsloop private({{.*}}) reduction(@min_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.wsloop reduction(@min_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
 !CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
index 786ab916d000c..f7b0ba681efeb 100644
--- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90
+++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
@@ -10,11 +10,12 @@ subroutine simple_loop
   ! CHECK-DAG:     %[[WS_END:.*]] = arith.constant 9 : i32
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:         omp.wsloop private(@{{.*}} %{{.*}} -> %[[ALLOCA_IV:.*]] : !fir.ref<i32>) {
+  ! CHECK-DAG:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! CHECK:         %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  ! CHECK:         omp.wsloop {
   ! CHECK-NEXT:      omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) {
   !$OMP DO
   do i=1, 9
-  ! CHECK:         %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
   ! CHECK:             fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
   ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref<i32>
   ! CHECK:             fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
diff --git a/flang/test/Lower/OpenMP/lastprivate-allocatable.f90 b/flang/test/Lower/OpenMP/lastprivate-allocatable.f90
index fd8338393dd88..6b7d849fde93c 100644
--- a/flang/test/Lower/OpenMP/lastprivate-allocatable.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-allocatable.f90
@@ -8,11 +8,12 @@
 ! CHECK:           fir.store %[[VAL_2]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
 ! CHECK:           omp.parallel {
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}} -> %{{.*}}, @{{.*}} %{{.*}} -> %[[VAL_17:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<i32>) {
+!                    create original copy of private variable
+! CHECK:             %[[VAL_16:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK:             %[[VAL_17:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             omp.wsloop {
 ! CHECK:               omp.loop_nest
-! CHECK:                   %[[VAL_16:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
-! CHECK:                   %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
 !                        [...]
 !                        if this is the last iteration
 ! CHECK:                 fir.if %{{.*}} {
diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
index c059382bf634c..faa3d3e053f34 100644
--- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
@@ -11,10 +11,12 @@
 !CHECK:      %[[CB_C_Y_COOR:.*]] = fir.coordinate_of %[[CB_C_REF_CVT]], %{{.*}} : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
 !CHECK:      %[[CB_C_Y_ADDR:.*]] = fir.convert %[[CB_C_Y_COOR]] : (!fir.ref<i8>) -> !fir.ref<f32>
 !CHECK:      %[[Y_DECL:.*]]:2 = hlfir.declare %[[CB_C_Y_ADDR]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:      omp.wsloop private(@{{.*}} %{{.*}} -> %[[PRIVATE_X_REF:.*]], @{{.*}} %{{.*}} -> %[[PRIVATE_Y_REF:.*]], @{{.*}} %{{.*}} -> %{{.*}} : !{{.*}}, !{{.*}}, !{{.*}}) {
-!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+!CHECK:      %[[PRIVATE_X_REF:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivate_commonEx"}
 !CHECK:      %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
 !CHECK:      %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 !CHECK:          %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
 !CHECK:          %[[C0:.*]] = arith.constant 0 : i32
 !CHECK:          %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90
index aacefd8b59c0f..63a81e818bc8b 100644
--- a/flang/test/Lower/OpenMP/lastprivate-iv.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90
@@ -6,12 +6,14 @@
 !CHECK:      %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_incEi"}
 !CHECK:      %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
+!CHECK:      %[[I_MEM:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+!CHECK:      %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 !CHECK:      %[[LB:.*]] = arith.constant 4 : i32
 !CHECK:      %[[UB:.*]] = arith.constant 10 : i32
 !CHECK:      %[[STEP:.*]]  = arith.constant 3 : i32
-!CHECK:      omp.wsloop private(@{{.*}} %{{.*}} -> %[[I_MEM:.*]] : !fir.ref<i32>) {
+!CHECK:      omp.wsloop {
 !CHECK-NEXT:   omp.loop_nest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-!CHECK:          %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:          %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:          %[[C0:.*]] = arith.constant 0 : i32
@@ -40,12 +42,15 @@ subroutine lastprivate_iv_inc()
 
 !CHECK:      %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_decEi"}
 !CHECK:      %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!CHECK:      %[[I_MEM:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+!CHECK:      %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 !CHECK:      %[[LB:.*]] = arith.constant 10 : i32
 !CHECK:      %[[UB:.*]] = arith.constant 1 : i32
 !CHECK:      %[[STEP:.*]]  = arith.constant -3 : i32
-!CHECK:      omp.wsloop private(@{{.*}} %{{.*}} -> %[[I_MEM:.*]] : !fir.ref<i32>) {
+!CHECK:      omp.wsloop {
 !CHECK-NEXT:   omp.loop_nest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-!CHECK:          %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:          %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:          %[[C0:.*]] = arith.constant 0 : i32
@@ -75,7 +80,7 @@ subroutine lastprivate_iv_dec()
 subroutine lastprivate_iv_i1
   integer*1 :: i1
   i1=0
-!CHECK:    omp.wsloop private({{.*}})
+!CHECK:    omp.wsloop
 !CHECK:      omp.loop_nest
 !CHECK:        fir.if %{{.*}} {
 !CHECK:          %[[I8_VAL:.*]] = fir.convert %{{.*}} : (i32) -> i8
diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90
index fc7dd43499863..2dab22a1c1f90 100644
--- a/flang/test/Lower/OpenMP/location.f90
+++ b/flang/test/Lower/OpenMP/location.f90
@@ -28,7 +28,7 @@ subroutine sub_target()
 
 !CHECK-LABEL: sub_loop
 subroutine sub_loop()
-!CHECK: omp.wsloop private({{.*}}) {
+!CHECK: omp.wsloop {
 !CHECK-NEXT: omp.loop_nest {{.*}} {
   !$omp do
   do i=1,10
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 4b4d640e449ee..9fa0de3bfe171 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -11,7 +11,7 @@
 subroutine test_no_clauses()
   integer :: i, j, dummy = 1
 
-  ! CHECK: omp.loop private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
+  ! CHECK: omp.simd private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
   ! CHECK-NEXT:   omp.loop_nest (%[[IV:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
   ! CHECK:          %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]]
   ! CHECK:          fir.store %[[IV]] to %[[ARG_DECL]]#1 : !fir.ref<i32>
@@ -27,7 +27,7 @@ subroutine test_no_clauses()
 ! CHECK-LABEL: func.func @_QPtest_collapse
 subroutine test_collapse()
   integer :: i, j, dummy = 1
-  ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+  ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
   ! CHECK-NEXT:   omp.loop_nest (%{{.*}}, %{{.*}}) : i32 {{.*}} {
   ! CHECK:        }
   ! CHECK: }
@@ -43,7 +43,7 @@ subroutine test_collapse()
 ! CHECK-LABEL: func.func @_QPtest_private
 subroutine test_private()
   integer :: i, dummy = 1
-  ! CHECK: omp.loop private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+  ! CHECK: omp.simd private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
   ! CHECK-NEXT:   omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
   ! CHECK:          %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_privateEdummy"}
   ! CHECK:          %{{.*}} = fir.load %[[DUMMY_DECL]]#0
@@ -100,3 +100,42 @@ subroutine test_bind()
   end do
   !$omp end loop
 end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_nested_directives
+subroutine test_nested_directives
+  implicit none
+  integer, parameter :: N = 100000
+  integer a(N), b(N), c(N)
+  integer j,i, num, flag;
+  num = N
+
+  ! CHECK: omp.teams {
+
+  ! Verify the first `loop` directive was combined with `target teams` into 
+  ! `target teams distribute parallel do`.
+  ! CHECK:   omp.parallel {{.*}} {
+  ! CHECK:     omp.distribute {
+  ! CHECK:       omp.wsloop {
+  ! CHECK:         omp.loop_nest {{.*}} {
+
+  ! Very the second `loop` directive was rewritten to `simd`.
+  ! CHECK:           omp.simd {{.*}} {
+  ! CHECK:             omp.loop_nest {{.*}} {
+  ! CHECK:             }
+  ! CHECK:           }
+
+  ! CHECK:         }
+  ! CHECK:       } {omp.composite}
+  ! CHECK:     } {omp.composite}
+  ! CHECK:   } {omp.composite}
+  ! CHECK: }
+  !$omp target teams map(to: a,b) map(from: c)
+  !$omp loop
+  do j=1,1000
+    !$omp loop
+    do i=1,N
+      c(i) = a(i) * b(i)
+    end do
+  end do
+  !$omp end target teams
+end subroutine
diff --git a/flang/test/Lower/OpenMP/order-clause.f90 b/flang/test/Lower/OpenMP/order-clause.f90
index 1f678e02708da..a30d82979021d 100644
--- a/flang/test/Lower/OpenMP/order-clause.f90
+++ b/flang/test/Lower/OpenMP/order-clause.f90
@@ -20,15 +20,15 @@ end subroutine simd_order
 
 !CHECK-LABEL:   func.func @_QPdo_order() {
 subroutine do_order
-   !CHECK: omp.wsloop order(reproducible:concurrent) private({{.*}}) {
+   !CHECK: omp.wsloop order(reproducible:concurrent) {
    !$omp do order(concurrent)
    do i = 1, 10
    end do
-   !CHECK: omp.wsloop order(reproducible:concurrent) private({{.*}}) {
+   !CHECK: omp.wsloop order(reproducible:concurrent) {
    !$omp do order(reproducible:concurrent)
    do i = 1, 10
    end do
-   !CHECK: omp.wsloop order(unconstrained:concurrent) private({{.*}}) {
+   !CHECK: omp.wsloop order(unconstrained:concurrent) {
    !$omp do order(unconstrained:concurrent)
    do i = 1, 10
    end do
diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
index 531413c124f81..86309a24f91a0 100644
--- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
@@ -10,12 +10,12 @@
 !CHECK-DAG: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1_REF]] typeparams %[[FIVE]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index, !fir.dscope) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 
 !CHECK: omp.parallel {
+!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", pinned, {{.*}}}
+!CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 
 ! Check that we are accessing the clone inside the loop
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ARG1_PVT:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : !fir.ref<!fir.char<1,5>>, !{{.*}}) {
+!CHECK: omp.wsloop {
 !CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK: %[[FIVE:.*]] = arith.constant 5 : index
-!CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 !CHECK: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
 !CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
@@ -58,9 +58,10 @@ subroutine lastprivate_character(arg1)
 !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
 !CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: omp.parallel  {
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[CLONE:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : !fir.ref<i32>, !{{.*}}) {
+!CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, {{.*}}}
+!CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop {
 !CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK:      %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -97,10 +98,12 @@ subroutine lastprivate_int(arg1)
 !CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel  {
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[CLONE1:.*]], @{{.*}} %{{.*}}#0 -> %[[CLONE2:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !{{.*}}) {
-!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, {{.*}}}
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2", pinned, {{.*}}}
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -139,10 +142,12 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel  {
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[CLONE1:.*]], @{{.*}} %{{.*}}#0 -> %[[CLONE2:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !{{.*}}) {
-!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2", pinned, {{.*}}}
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, {{.*}}}
+!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -182,11 +187,16 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK:    %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel  {
 ! Firstprivate update
-!CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[CLONE1:.*]], @{{.*}} %{{.*}}#0 -> %[[CLONE2:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !{{.*}}) {
-!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, {{.*}}}
 !CHECK: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 : i32, !fir.ref<i32>
+! Lastprivate Allocation
+!CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2", pinned, {{.*}}}
 !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-NOT: omp.barrier
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -224,14 +234,17 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK: omp.parallel  {
 
 ! Firstprivate update
+!CHECK: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, {{.*}}}
+!CHECK: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFfirstpriv_lastpriv_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
+!CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 : i32, !fir.ref<i32>
 
+!CHECK-NEXT: %[[IV:.*]] = fir.alloca i32 {bindc_name = "n", pinned, {{.*}}}
+!CHECK-NEXT: hlfir.declare %[[IV]]
 
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[CLONE1:.*]], @{{.*}} %{{.*}}#0 -> %[[IV:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+!CHECK: omp.wsloop {
 !CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFfirstpriv_lastpriv_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
-!CHECK-NEXT: hlfir.declare %[[IV]]
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index e8ac8e7f62122..99323e69113bc 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -56,15 +56,17 @@
 ! CHECK-DAG:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"}
 ! CHECK-DAG:         %[[X_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:         omp.parallel {
-
+! CHECK-DAG:           %[[PRIV_I:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK-DAG:           %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:           %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFmultiple_private_fixEj"}
+! CHECK-DAG:           %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:           %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, {{.*}}}
+! CHECK-DAG:           %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[PRIV_J:.*]], @{{.*}} %{{.*}}#0 -> %[[PRIV_X:.*]], @{{.*}} %{{.*}}#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:           omp.wsloop {
 ! CHECK-NEXT:        omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
-! CHECK-DAG:           %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK-DAG:           %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK-DAG:           %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:               %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index f26b97b55d51a..7114314df05d3 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -271,16 +271,17 @@ subroutine simple_loop_1
   !$OMP PARALLEL PRIVATE(r)
   ! FIRDialect:      %[[R_DECL:.*]]:2 = hlfir.declare %[[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsimple_loop_1Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
 
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
   ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<i32>) {
+  ! FIRDialect:      omp.wsloop {
   ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
   ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
   ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}} : (!fir.ref<i8>, i32) -> i1
@@ -298,23 +299,34 @@ subroutine simple_loop_2
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
   ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[R:.*]], @{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<i32>) {
+  ! FIRDialect:      omp.wsloop {
   ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     %[[R_DECL:.*]]:2 = hlfir.declare %[[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
-  ! FIRDialect:     %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
   ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
   ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! FIRDialect:     omp.yield
+  ! FIRDialect:     {{%.*}} = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:     fir.if {{%.*}} {
+  ! FIRDialect:     [[LD:%.*]] = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:     fir.store {{%.*}} to %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END DO
   ! FIRDialect:  omp.terminator
   !$OMP END PARALLEL
@@ -325,24 +337,35 @@ subroutine simple_loop_3
   integer :: i
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
+
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
   ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[R:.*]], @{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<i32>) {
+  ! FIRDialect:      omp.wsloop {
   ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:      [[R_DECL:%.*]]:2 = hlfir.declare %[[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
-
-  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
   ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref<i32>
   ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
   ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! FIRDialect:      omp.yield
+  ! FIRDialect:      {{%.*}} = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END PARALLEL DO
   ! FIRDialect:  omp.terminator
 end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
index 11d5682209676..dabd495d733b5 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
@@ -92,12 +92,13 @@ program reduce
 ! CHECK:           %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
 ! CHECK:           fir.store %[[VAL_11]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_heap_Uxi32 %[[VAL_3]]#0 -> %[[VAL_17:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %[[VAL_3]]#0 -> %[[VAL_17:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_18:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
-! CHECK:                 %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_17]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_13]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
index 54fe53b5d6f6a..1e07018a68877 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
@@ -104,12 +104,13 @@ program reduce
 ! CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.char<{{.*}}>>) -> !fir.ref<i8>
 ! CHECK:           %[[VAL_21:.*]] = fir.call @_FortranAPointerAllocate(%[[VAL_19]], %[[VAL_4]], %[[VAL_5]], %[[VAL_20]], %[[VAL_7]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_22:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_24:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_25:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_26:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_22:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_ptr_Uxi32 %[[VAL_3]]#0 -> %[[VAL_27:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_ptr_Uxi32 %[[VAL_3]]#0 -> %[[VAL_27:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
 ! CHECK:               omp.loop_nest (%[[VAL_28:.*]]) : i32 = (%[[VAL_24]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
-! CHECK:                 %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_27]] {fortran_attrs = {{.*}}<pointer>, uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
 ! CHECK:                 fir.store %[[VAL_28]] to %[[VAL_23]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_23]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90
index 194b3fdd98201..bb45206e36ee5 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction3.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90
@@ -71,12 +71,13 @@
 ! CHECK:           omp.parallel {
 ! CHECK:             %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 ! CHECK:             fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:             %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_19:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_15:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
-! CHECK:                 %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
 ! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index a0cdaaa4c7b09..4e7c2c15df743 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -11,13 +11,20 @@ subroutine omp_do_firstprivate(a)
   n = a+1
   !$omp parallel do firstprivate(a)
   ! CHECK:  omp.parallel {
+
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_firstprivateEa"}
+  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: hlfir.assign %[[LD]] to %[[A_PVT_DECL]]#0 : i32, !fir.ref<i32>
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+  ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_firstprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop private(@{{.*a_firstprivate_ref_i32.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*i_private_ref_i32.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+  ! CHECK-NEXT: omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_firstprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -40,14 +47,25 @@ subroutine omp_do_firstprivate2(a, n)
   n = a+1
   !$omp parallel do firstprivate(a, n)
   ! CHECK:  omp.parallel {
-  ! CHECK: %[[LB:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: %[[UB:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK: omp.wsloop private(@{{.*a_firstprivate_ref_i32}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*n_firstprivate_ref_i32}} %{{.*}}#0 -> %[[N_PVT_REF:.*]], @{{.*i_private_ref_i32}} %{{.*}}#0 -> %[[I_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, {{.*}}}
   ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK: %[[LD:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: hlfir.assign %[[LD]] to %[[A_PVT_DECL]]#0 : i32, !fir.ref<i32>
+
+  ! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_firstprivate2En"}
   ! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK: %[[LD1:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: hlfir.assign %[[LD1]] to %[[N_PVT_DECL]]#0 : i32, !fir.ref<i32>
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
   ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+  ! CHECK: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK: omp.yield
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
index a7c0dc3b1b406..dbde5291c01c8 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
@@ -12,15 +12,17 @@ subroutine omp_do_lastprivate(a)
   !$omp parallel do lastprivate(a)
   ! CHECK:  omp.parallel {
 
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivateEa"}
+  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+  ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+  ! CHECK-NEXT: omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-  ! CHECK:      %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK:      %[[NEXT_ARG1:.*]] = arith.addi %[[ARG1]], %[[STEP]] : i32
@@ -56,15 +58,20 @@ subroutine omp_do_lastprivate2(a, n)
   !$omp parallel do lastprivate(a, n)
   ! CHECK:  omp.parallel {
 
-  ! CHECK: %[[LB:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: %[[UB:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[N_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, {{.*}}}
   ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_lastprivate2En"}
   ! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
   ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
+  ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+  ! CHECK: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK: %[[NEXT_ARG2:.*]] = arith.addi %[[ARG2]], %[[STEP]] : i32
@@ -97,18 +104,23 @@ subroutine omp_do_lastprivate_collapse2(a)
   !$omp parallel do lastprivate(a) collapse(2)
   ! CHECK:  omp.parallel {
 
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivate_collapse2Ea"}
+  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+  ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  !
+  ! CHECK: %[[J_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "j", pinned, {{.*}}}
+  ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
   ! CHECK: %[[LB1:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP1:.*]] = arith.constant 1 : i32
   ! CHECK: %[[LB2:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+  ! CHECK-NEXT: omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) {
-  ! CHECK:      %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
@@ -152,23 +164,29 @@ subroutine omp_do_lastprivate_collapse3(a)
   !$omp parallel do lastprivate(a) collapse(3)
   ! CHECK:  omp.parallel {
 
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivate_collapse3Ea"}
+  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+  ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[J_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "j", pinned, {{.*}}}
+  ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  ! CHECK: %[[K_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "k", pinned, {{.*}}}
+  ! CHECK: %[[K_PVT_DECL:.*]]:2 = hlfir.declare %[[K_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
   ! CHECK: %[[LB1:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP1:.*]] = arith.constant 1 : i32
   ! CHECK: %[[LB2:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
   ! CHECK: %[[LB3:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+  ! CHECK-NEXT: omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) {
-  ! CHECK:      %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:      %[[K_PVT_DECL:.*]]:2 = hlfir.declare %[[K_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[ARG3]] to %[[K_PVT_DECL]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
index 3f44f292cb6a0..99c521406a777 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
@@ -4,7 +4,7 @@
 ! RUN: flang -fc1 -fopenmp -mmlir --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
 
 ! CHECK: omp.parallel {
-! CHECK: omp.wsloop private({{.*}}) reduction(byref @add_reduction_byref_i32
+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_i32
 subroutine sb
   integer :: x
   x = 0
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90
index a206eef52da5a..cfeb5de83f4e8 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90
@@ -4,7 +4,7 @@
 ! RUN: flang -fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
 
 ! CHECK: omp.parallel {
-! CHECK: omp.wsloop private({{.*}}) reduction(@add_reduction_i32
+! CHECK: omp.wsloop reduction(@add_reduction_i32
 subroutine sb
   integer :: x
   x = 0
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90
index 7116069e8daa6..cba5209f85989 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90
@@ -10,7 +10,7 @@ subroutine simple_parallel_do
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop private({{.*}}) {
+  ! CHECK:      omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO
   do i=1, 9
@@ -39,7 +39,7 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop private({{.*}}) {
+  ! CHECK:      omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
@@ -64,7 +64,7 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop schedule(dynamic) private({{.*}}) {
+  ! CHECK:      omp.wsloop schedule(dynamic) {
   ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
@@ -92,16 +92,19 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   integer :: nt
   integer :: i
   ! CHECK:  omp.parallel
-
+  ! CHECK:      %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
+  ! CHECK:      %[[PRIVATE_COND_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  ! CHECK:      %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
+  ! CHECK:      %[[PRIVATE_NT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK:      %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK:      hlfir.assign %[[NT_VAL]] to %[[PRIVATE_NT_DECL]]#0 : i32, !fir.ref<i32>
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[PRIVATE_COND_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[PRIVATE_NT_REF:.*]], @{{.*}} %3#0 -> %{{.*}} : !fir.ref<!fir.logical<4>>, !fir.ref<i32>, !fir.ref<i32>) {
+  ! CHECK:      omp.wsloop {
   ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
-  ! CHECK:      %[[PRIVATE_COND_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  ! CHECK:      %[[PRIVATE_NT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
   ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
   ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -146,12 +149,14 @@ end subroutine parallel_private_do
 
 ! CHECK:             %[[NT_PRIV_DECL:.*]]:2 = hlfir.declare %[[NT_PRIV_ADDR]] {uniq_name = "_QFparallel_private_doEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
+! CHECK:             %[[I_PRIV:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV]] {uniq_name = "_QFparallel_private_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[I_PRIV:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV]] {uniq_name = "_QFparallel_private_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:                 fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:                 omp.yield
@@ -190,13 +195,14 @@ end subroutine omp_parallel_multiple_firstprivate_do
 
 ! CHECK:             %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
+! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[I_PRIV_ADDR:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:                 %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:                 fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:                 omp.yield
@@ -231,15 +237,23 @@ end subroutine parallel_do_private
 ! CHECK-SAME:                                      %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
 ! CHECK:           %[[NT_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFparallel_do_privateEnt"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           omp.parallel   {
+
+! CHECK:             %[[COND_PRIV_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"}
+! CHECK:             %[[COND_PRIV_DECL:.*]]:2 = hlfir.declare %[[COND_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+
+! CHECK:             %[[NT_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
+! CHECK:             %[[NT_PRIV_DECL:.*]]:2 = hlfir.declare %[[NT_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[NT_VAL]] to %[[NT_PRIV_DECL]]#0 : i32, !fir.ref<i32>
+
+! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[COND_PRIV_ADDR:.*]], @{{.*}} %{{.*}}#0 -> %[[NT_PRIV_ADDR:.*]], @{{.*}} %3#0 -> %[[I_PRIV_ADDR:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[COND_PRIV_DECL:.*]]:2 = hlfir.declare %[[COND_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:                 %[[NT_PRIV_DECL:.*]]:2 = hlfir.declare %[[NT_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                 %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
 ! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:                 fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:                 omp.yield
@@ -273,15 +287,25 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:           %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+
+! CHECK:             %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"}
+! CHECK:             %[[A_PRIV_DECL:.*]]:2 = hlfir.declare %[[A_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[A:.*]] = fir.load %[[A_DECL]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[A]] to %[[A_PRIV_DECL]]#0 : i32, !fir.ref<i32>
+
+! CHECK:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"}
+! CHECK:             %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[B:.*]] = fir.load %[[B_DECL]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[B]] to %[[B_PRIV_DECL]]#0 : i32, !fir.ref<i32>
+
+! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PRIV_ADDR:.*]], @{{.*}} %{{.}}#0 -> %[[B_PRIV_ADDR:.*]], @{{.*}} %{{.}}#0 -> %[[I_PRIV_ADDR:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:         omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:                 %[[A_PRIV_DECL:.*]]:2 = hlfir.declare %[[A_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                 %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                 %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
 ! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:                 fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:                 omp.yield
diff --git a/flang/test/Lower/OpenMP/private-derived-type.f90 b/flang/test/Lower/OpenMP/private-derived-type.f90
index 7e0a3f14639f6..df1c7c3f92227 100644
--- a/flang/test/Lower/OpenMP/private-derived-type.f90
+++ b/flang/test/Lower/OpenMP/private-derived-type.f90
@@ -1,5 +1,5 @@
-! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
-! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s
 
 subroutine s4
   type y3
diff --git a/flang/test/Lower/OpenMP/same_var_first_lastprivate.f90 b/flang/test/Lower/OpenMP/same_var_first_lastprivate.f90
index c49a0908b721e..e8e4a0802e00d 100644
--- a/flang/test/Lower/OpenMP/same_var_first_lastprivate.f90
+++ b/flang/test/Lower/OpenMP/same_var_first_lastprivate.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -mmlir --openmp-enable-delayed-privatization-staging=true -emit-hlfir %s -o - | FileCheck %s
 
 subroutine first_and_lastprivate
   integer i
diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
index d817c4e771b31..f174caa7fa4fd 100644
--- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
@@ -80,13 +80,14 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_stop_in_region4Ex"}
 ! CHECK:         %[[VAL_2_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtest_stop_in_region4Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
+! CHECK:         %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:         %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtest_stop_in_region4Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>) {
+! CHECK:         omp.wsloop {
 ! CHECK-NEXT:      omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-! CHECK:             %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtest_stop_in_region4Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref<i32>
 ! CHECK:             cf.br ^bb1
 ! CHECK:           ^bb1:
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index bf801e69405b9..04764be2293c1 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -586,10 +586,11 @@ subroutine omp_target_parallel_do
       !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[ARG_0]](%{{.*}}) {uniq_name = "_QFomp_target_parallel_doEa"} : (!fir.ref<!fir.array<1024xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
       !CHECK: omp.parallel
       !$omp target parallel do map(tofrom: a)
-         !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[I_PVT_ALLOCA:.*]] : !fir.ref<i32>) {
+         !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+         !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+         !CHECK: omp.wsloop {
          !CHECK-NEXT: omp.loop_nest (%[[I_VAL:.*]]) : i32
          do i = 1, 1024
-         !CHECK:     %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
            !CHECK:   fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
            !CHECK:   %[[C10:.*]] = arith.constant 10 : i32
            !CHECK:   %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90
index a9925a484eb1c..a0955c8440c1c 100644
--- a/flang/test/Lower/OpenMP/unstructured.f90
+++ b/flang/test/Lower/OpenMP/unstructured.f90
@@ -69,9 +69,10 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
 
-! CHECK:     omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_2:.*]] : !fir.ref<i32>) {
+! CHECK:     %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+! CHECK:     %[[OMP_LOOP_K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFss3Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:     omp.wsloop {
 ! CHECK:       omp.loop_nest (%[[ARG1:.*]]) : {{.*}} {
-! CHECK:         %[[OMP_LOOP_K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFss3Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:         fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref<i32>
 ! CHECK:         @_FortranAioBeginExternalListOutput
 ! CHECK:         %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref<i32>
@@ -80,9 +81,11 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:       }
 ! CHECK:     }
 
-! CHECK:     omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_1:.*]] : !fir.ref<i32>) {
+! CHECK:     %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+! CHECK:     %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFss3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+! CHECK:     omp.wsloop {
 ! CHECK:       omp.loop_nest (%[[ARG2:.*]]) : {{.*}} {
-! CHECK:         %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFss3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:         fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:         br ^bb1
 ! CHECK:       ^bb2:  // 2 preds: ^bb1, ^bb5
@@ -125,9 +128,10 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
-! CHECK:         omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA:.*]] : !fir.ref<i32>) {
+! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned, uniq_name = "_QFss4Ej"}
+! CHECK:         %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         omp.wsloop {
 ! CHECK-NEXT:      omp.loop_nest (%[[ARG:.*]]) : {{.*}} {
-! CHECK:             %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:             %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:             %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
@@ -156,7 +160,7 @@ subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
 
 ! CHECK-LABEL: func @_QPss5() {
 ! CHECK:  omp.parallel private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
-! CHECK:    omp.wsloop private({{.*}}) {
+! CHECK:    omp.wsloop {
 ! CHECK:      omp.loop_nest {{.*}} {
 ! CHECK:        br ^[[BB1:.*]]
 ! CHECK:      ^[[BB1]]:
@@ -198,7 +202,7 @@ subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel)
 ! CHECK:  ^[[BB1_OUTER]]:
 ! CHECK:    cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK:  ^[[BB2_OUTER]]:
-! CHECK:    omp.wsloop private({{.*}}) {
+! CHECK:    omp.wsloop {
 ! CHECK:      omp.loop_nest {{.*}} {
 ! CHECK:        br ^[[BB1:.*]]
 ! CHECK:      ^[[BB1]]:
@@ -244,7 +248,7 @@ subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel)
 ! CHECK:   cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK-NEXT: ^[[BB2_OUTER:.*]]:
 ! CHECK:   omp.parallel  {
-! CHECK:     omp.wsloop private({{.*}}) {
+! CHECK:     omp.wsloop {
 ! CHECK:       omp.loop_nest {{.*}} {
 ! CHECK:         br ^[[BB1:.*]]
 ! CHECK-NEXT:       ^[[BB1]]:
@@ -284,7 +288,7 @@ subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop)
 
 ! CHECK-LABEL: func @_QPss8() {
 ! CHECK:  omp.parallel  {
-! CHECK:    omp.wsloop private({{.*}}) {
+! CHECK:    omp.wsloop {
 ! CHECK:      omp.loop_nest {{.*}} {
 ! CHECK:        br ^[[BB1:.*]]
 ! CHECK-NEXT:      ^[[BB1]]:
diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90
index 0a2b962761acb..0fb7d6f1b64fa 100644
--- a/flang/test/Lower/OpenMP/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90
@@ -20,7 +20,7 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_2]] : i32) private({{.*}}) {
+! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_2]] : i32) {
 ! CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
 ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
@@ -40,7 +40,7 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_14]] : i32) private({{.*}}) {
+! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_14]] : i32) {
 ! CHECK-NEXT:      omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
 ! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
 ! CHECK:             %[[VAL_24:.*]] = arith.constant 2 : i32
@@ -66,7 +66,7 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_32:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_29]] : i32) private({{.*}}) {
+! CHECK:         omp.wsloop nowait schedule(static = %[[VAL_29]] : i32) {
 ! CHECK-NEXT:      omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_30]]) to (%[[VAL_31]]) inclusive step (%[[VAL_32]]) {
 ! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
 ! CHECK:             %[[VAL_39:.*]] = arith.constant 3 : i32
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index 6d9862e625400..61ee76d589107 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -38,6 +38,15 @@ program wsloop_collapse
 !CHECK:           %[[VAL_23:.*]] = arith.constant 0 : i32
 !CHECK:           hlfir.assign %[[VAL_23]] to %[[VAL_19]]#0 : i32, !fir.ref<i32>
 
+!CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+!CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "j", pinned, {{.*}}}
+!CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "k", pinned, {{.*}}}
+!CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
   integer :: i, j, k
   integer :: a, b, c
   integer :: x
@@ -56,17 +65,12 @@ program wsloop_collapse
 !CHECK:           %[[VAL_30:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
 !CHECK:           %[[VAL_32:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+!CHECK:           omp.wsloop {
 !CHECK-NEXT:        omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
   !$omp do collapse(3)
   do i = 1, a
      do j= 1, b
         do k = 1, c
-
-!CHECK:               %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:               %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:               %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
 !CHECK:               fir.store %[[VAL_33]] to %[[VAL_5]]#1 : !fir.ref<i32>
 !CHECK:               fir.store %[[VAL_34]] to %[[VAL_3]]#1 : !fir.ref<i32>
 !CHECK:               fir.store %[[VAL_35]] to %[[VAL_1]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
index e21aa4c678f42..9659fff2d42e7 100644
--- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
@@ -11,10 +11,11 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(monotonic:dynamic)
+!CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop nowait schedule(dynamic, monotonic) private({{.*}}) {
+!CHECK:     omp.wsloop nowait schedule(dynamic, monotonic) {
 !CHECK-NEXT:  omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
 !CHECK:         fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref<i32>
 
diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
index 23d3c49c00786..b1bea525ff489 100644
--- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
@@ -12,12 +12,13 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(nonmonotonic:dynamic)
+!CHECK:     %[[I_REF:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+!CHECK:     %[[ALLOCA_IV:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop nowait schedule(dynamic, nonmonotonic) private(@{{.*}} %{{.*}}#0 -> %[[I_REF:.*]] : !fir.ref<i32>) {
+!CHECK:     omp.wsloop nowait schedule(dynamic, nonmonotonic) {
 !CHECK-NEXT:  omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
-!CHECK:         %[[ALLOCA_IV:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:         fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90
index 4862b7296a9bc..5fa53f7b28447 100644
--- a/flang/test/Lower/OpenMP/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90
@@ -6,7 +6,7 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) private({{.*}}) {
+! CHECK:  omp.wsloop ordered(0) {
 ! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:        omp.yield
 ! CHECK:      }
@@ -27,7 +27,7 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) private({{.*}}) {
+! CHECK:  omp.wsloop ordered(1) {
 ! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:        omp.yield
 ! CHECK:      }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
index bc021e7a3b273..c38a79191bc4e 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
@@ -85,12 +85,13 @@
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -126,12 +127,13 @@ subroutine simple_int_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -168,12 +170,13 @@ subroutine simple_real_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -208,12 +211,13 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -257,12 +261,13 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @add_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @add_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -319,12 +324,13 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @add_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @add_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @add_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @add_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -388,12 +394,13 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:           %[[VAL_13:.*]] = arith.constant 0.000000e+00 : f64
 ! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_14:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], byref @add_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], byref @add_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], byref @add_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], byref @add_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], byref @add_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], byref @add_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
index a355e968b4146..c5278e0ef8815 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
@@ -53,12 +53,13 @@
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -94,12 +95,13 @@ subroutine simple_int_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -136,12 +138,13 @@ subroutine simple_real_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -176,12 +179,13 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -225,12 +229,13 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @add_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @add_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -287,12 +292,13 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @add_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @add_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @add_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @add_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -356,12 +362,13 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:           %[[VAL_13:.*]] = arith.constant 0.000000e+00 : f64
 ! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_14:.*]] : !fir.ref<i32>) reduction(@add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], @add_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], @add_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], @add_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], @add_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], @add_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], @add_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
index f09130152fb28..ce45d09d77a22 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -214,12 +214,13 @@ program reduce15
 ! CHECK:           }
 ! CHECK:           fir.store %[[VAL_54:.*]]#1 to %[[VAL_3]]#1 : !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_55:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_56:.*]]:2 = hlfir.declare %[[VAL_55]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_57:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_58:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_59:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_55:.*]] : !fir.ref<i32>) reduction(byref @max_byref_box_heap_Uxi32 %[[VAL_5]]#0 -> %[[VAL_60:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK:             omp.wsloop reduction(byref @max_byref_box_heap_Uxi32 %[[VAL_5]]#0 -> %[[VAL_60:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
 ! CHECK:               omp.loop_nest (%[[VAL_61:.*]]) : i32 = (%[[VAL_57]]) to (%[[VAL_58]]) inclusive step (%[[VAL_59]]) {
-! CHECK:                 %[[VAL_56:.*]]:2 = hlfir.declare %[[VAL_55]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_62:.*]]:2 = hlfir.declare %[[VAL_60]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:                 fir.store %[[VAL_61]] to %[[VAL_56]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_63:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -255,12 +256,13 @@ program reduce15
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_87:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_88:.*]]:2 = hlfir.declare %[[VAL_87]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_89:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_90:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_91:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_87:.*]] : !fir.ref<i32>) reduction(byref @min_byref_box_heap_Uxi32 %[[VAL_7]]#0 -> %[[VAL_92:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK:             omp.wsloop reduction(byref @min_byref_box_heap_Uxi32 %[[VAL_7]]#0 -> %[[VAL_92:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
 ! CHECK:               omp.loop_nest (%[[VAL_93:.*]]) : i32 = (%[[VAL_89]]) to (%[[VAL_90]]) inclusive step (%[[VAL_91]]) {
-! CHECK:                 %[[VAL_88:.*]]:2 = hlfir.declare %[[VAL_87]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_94:.*]]:2 = hlfir.declare %[[VAL_92]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:                 fir.store %[[VAL_93]] to %[[VAL_88]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_95:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
index 7e6d7fddff5a1..ba7aea0d96c5b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
@@ -77,12 +77,13 @@ program reduce
 ! CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_8]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_9:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_heap_i32 %[[VAL_5]]#0 -> %[[VAL_14:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_heap_i32 %[[VAL_5]]#0 -> %[[VAL_14:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_15:.*]]) : i32 = (%[[VAL_11]]) to (%[[VAL_12]]) inclusive step (%[[VAL_13]]) {
-! CHECK:                 %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_14]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
 ! CHECK:                 fir.store %[[VAL_15]] to %[[VAL_10]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
index 0e2fc3a24ee1b..9785f77c0e091 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
@@ -81,12 +81,13 @@ subroutine reduce(r)
 ! CHECK:           omp.parallel {
 ! CHECK:             %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
 ! CHECK:             fir.store %[[VAL_3]]#1 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_4]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_4]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
index 07debb9f6b9e0..ea5df5a836972 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
@@ -76,12 +76,13 @@ program reduce
 ! CHECK:             %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
 ! CHECK:             %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:             fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:             %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_8:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
-! CHECK:                 %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
 ! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
index a25bedb359f4e..9815cfa9c3150 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
@@ -76,12 +76,13 @@ program reduce
 ! CHECK:             %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
 ! CHECK:             %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:             fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:             %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_8:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
-! CHECK:                 %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
 ! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
index 18dcc3d722886..829229807698a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
@@ -32,12 +32,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @iand_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @iand_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
index eaf07f93c7474..6c060f2e5292a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
@@ -24,12 +24,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@iand_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@iand_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
index 6be6913f91a33..284ada404bd60 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
@@ -28,9 +28,10 @@
 
 
 !CHECK: omp.parallel
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[I_REF:.*]] : !fir.ref<i32>) reduction(byref @ieor_byref_i32 %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
-!CHECK-NEXT: omp.loop_nest
+!CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop reduction(byref @ieor_byref_i32 %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
index 632dbcf1348ec..e67253a413ce2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
@@ -17,9 +17,10 @@
 
 
 !CHECK: omp.parallel
-!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[I_REF:.*]] : !fir.ref<i32>) reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
-!CHECK-NEXT: omp.loop_nest
+!CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
index 90b9d2f61f930..315121cc7beb7 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
@@ -30,12 +30,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @ior_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @ior_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
index 144bc17cf8b31..3da250da9703d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
@@ -24,12 +24,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@ior_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@ior_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
index e73540a93a71b..30908b6bdd4ce 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
@@ -39,12 +39,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -87,12 +88,13 @@ end subroutine simple_reduction
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -144,12 +146,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(byref @and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
index c059dab5bff5a..367683de02080 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
@@ -31,12 +31,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -79,12 +80,13 @@ end subroutine simple_reduction
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -136,12 +138,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
index 5e24ad6f7bb63..9137dd8ff4454 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
@@ -39,12 +39,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -86,12 +87,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -143,12 +145,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(byref @eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
index ad9e869984eac..d1ef676c37407 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
@@ -31,12 +31,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -78,12 +79,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -135,12 +137,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
index b5bf1d0d0b589..d1491a0f5561d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
@@ -39,12 +39,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -87,12 +88,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -146,12 +148,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(byref @neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
index ac9fc7f051d88..b4df699c49ffa 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
@@ -31,12 +31,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -79,12 +80,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -138,12 +140,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
index 883064884b637..8f4a6c22c1d74 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
@@ -38,12 +38,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -85,12 +86,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(byref @or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -142,12 +144,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(byref @or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(byref @or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], byref @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], byref @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
index 312c08d17a14d..9d367797ec216 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
@@ -31,12 +31,13 @@
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -78,12 +79,13 @@ subroutine simple_reduction(y)
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -135,12 +137,13 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]], @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]], @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
index 0438e19f34391..28c70899e6ccc 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
@@ -1,7 +1,7 @@
 ! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
 
-! CHECK: omp.wsloop private({{.*}}) reduction(byref @max_byref_i32
+! CHECK: omp.wsloop reduction(byref @max_byref_i32
 ! CHECK: arith.cmpi sgt
 ! CHECK: arith.select
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90
index 66c75bbe38f10..abd7ca1ae555d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90
@@ -1,7 +1,7 @@
 ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-! CHECK: omp.wsloop private({{.*}}) reduction(@max_i32
+! CHECK: omp.wsloop reduction(@max_i32
 ! CHECK: arith.cmpi sgt
 ! CHECK: arith.select
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
index 07c18f90480bf..9abff8ccfa3b6 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -45,12 +45,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -74,12 +75,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -93,12 +95,13 @@
 ! CHECK:                 omp.yield
 ! CHECK:             omp.terminator
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_30:.*]] : !fir.ref<i32>) reduction(byref @max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:                 %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index 7bdfa0948c747..7237d3f903b74 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -35,12 +35,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -64,12 +65,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -83,12 +85,13 @@
 ! CHECK:                 omp.yield
 ! CHECK:             omp.terminator
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_30:.*]] : !fir.ref<i32>) reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:                 %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
index 88a455f4b45ac..a4bfbaa09d2fa 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -45,12 +45,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @min_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @min_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -74,12 +75,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(byref @min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -95,12 +97,13 @@
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_30:.*]] : !fir.ref<i32>) reduction(byref @min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:                 %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 6d4dcf1ab68eb..ce9e53a17523c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -35,12 +35,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@min_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -64,12 +65,13 @@
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -85,12 +87,13 @@
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_30:.*]] : !fir.ref<i32>) reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:                 %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
index db8e59cb09dfa..d83ebb77af3eb 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
@@ -34,12 +34,13 @@ program reduce
 ! CHECK:           %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref<i32>
 ! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]] : !fir.ref<i32>) reduction(@min_i32 %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:                 %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
index 85df29e83f75d..18554fbb72aee 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
@@ -88,12 +88,13 @@
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -124,12 +125,13 @@ subroutine simple_int_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -161,12 +163,13 @@ subroutine simple_real_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -197,12 +200,13 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -242,12 +246,13 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @multiply_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @multiply_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -300,12 +305,13 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @multiply_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @multiply_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], byref @multiply_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], byref @multiply_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -365,12 +371,13 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:           %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f64
 ! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_14:.*]] : !fir.ref<i32>) reduction(byref @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], byref @multiply_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], byref @multiply_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], byref @multiply_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
+! CHECK:             omp.wsloop reduction(byref @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], byref @multiply_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], byref @multiply_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], byref @multiply_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
index 09c44f187f4a2..f5c12ccf61f76 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
@@ -55,12 +55,13 @@
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -91,12 +92,13 @@ subroutine simple_int_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -128,12 +130,13 @@ subroutine simple_real_reduction
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -164,12 +167,13 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_5:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:                 %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -209,12 +213,13 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @multiply_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @multiply_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -267,12 +272,13 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_11:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @multiply_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @multiply_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]], @multiply_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]], @multiply_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -332,12 +338,13 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:           %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f64
 ! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_14:.*]] : !fir.ref<i32>) reduction(@multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], @multiply_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], @multiply_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], @multiply_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]], @multiply_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]], @multiply_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]], @multiply_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<i32>, !fir.ref<i64>, !fir.ref<f32>, !fir.ref<f64>) {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
index 66229259adf82..659ba06005670 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
@@ -41,7 +41,7 @@
 !CHECK:      %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFmultiple_reductionEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK:      %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"}
 !CHECK:      %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:      omp.wsloop private({{.*}}) reduction(
+!CHECK:      omp.wsloop reduction(
 !CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:[^,]+]],
 !CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:[^,]+]],
 !CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] :
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
index 75773416e4840..5b6ab095b45b6 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
@@ -112,12 +112,13 @@ program main
 ! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
 ! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
 ! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
+! CHECK:             %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK:             %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_13:.*]] : !fir.ref<i32>) reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]], byref @add_reduction_byref_box_3x3xf64 %[[VAL_12]] -> %[[VAL_19:.*]] : !fir.ref<f64>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]], byref @add_reduction_byref_box_3x3xf64 %[[VAL_12]] -> %[[VAL_19:.*]] : !fir.ref<f64>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
 ! CHECK:               omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
-! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
 ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref<!fir.box<!fir.array<3x3xf64>>>) -> (!fir.ref<!fir.box<!fir.array<3x3xf64>>>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>)
 ! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_14]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
index f706e48b8fda8..2c126bb8962c2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
@@ -87,12 +87,13 @@ program reduce_pointer
 ! CHECK:           %[[VAL_17:.*]] = fir.box_addr %[[VAL_16]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
 ! CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_17]] : i32, !fir.ptr<i32>
 ! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 5 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_ptr_i32 %[[VAL_5]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_ptr_i32 %[[VAL_5]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
 ! CHECK:               omp.loop_nest (%[[VAL_24:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:                 %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_23]] {fortran_attrs = {{.*}}<pointer>, uniq_name = "_QFEv"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
 ! CHECK:                 fir.store %[[VAL_24]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-schedule.f90 b/flang/test/Lower/OpenMP/wsloop-schedule.f90
index 0ff4ce7c3ede3..ae854a2de0c9d 100644
--- a/flang/test/Lower/OpenMP/wsloop-schedule.f90
+++ b/flang/test/Lower/OpenMP/wsloop-schedule.f90
@@ -14,7 +14,7 @@ program wsloop_dynamic
 !CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:      omp.wsloop nowait schedule(runtime, simd) private({{.*}}) {
+!CHECK:      omp.wsloop nowait schedule(runtime, simd) {
 !CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
 !CHECK:          fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32>
 
diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
index 6174718c08758..8c89f863ab877 100644
--- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90
+++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
@@ -29,7 +29,7 @@ end subroutine sub
 ! CHECK-SAME:                      %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
 ! CHECK-SAME:                      %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
 ! [...]
-! CHECK:             omp.wsloop private({{.*}}) {
+! CHECK:             omp.wsloop {
 ! CHECK-NEXT:          omp.loop_nest (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
 ! [...]
 ! CHECK:                 cf.br ^bb1
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index 50b2b3a21ff1e..cc77ce754d97e 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -22,7 +22,7 @@ program wsloop_variable
 !CHECK:      %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:      %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:      %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:      omp.wsloop private({{.*}}) {
+!CHECK:      omp.wsloop {
 !CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:          fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref<i16>
@@ -48,7 +48,7 @@ program wsloop_variable
 !CHECK:      %[[TMP12:.*]] = arith.constant 1 : i32
 !CHECK:      %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
 !CHECK:      %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:      omp.wsloop private({{.*}}) {
+!CHECK:      omp.wsloop {
 !CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) {
 !CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:          fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref<i16>
@@ -68,7 +68,7 @@ program wsloop_variable
 !CHECK:      %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
 !CHECK:      %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
 !CHECK:      %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:      omp.wsloop private({{.*}}) {
+!CHECK:      omp.wsloop {
 !CHECK-NEXT:   omp.loop_nest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
 !CHECK:          %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
 !CHECK:          fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref<i128>
@@ -123,14 +123,16 @@ subroutine wsloop_variable_sub
   integer(kind=16) :: i16_lb
   real :: x
 
+!CHECK:           %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i2", pinned, {{.*}}}
+!CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFwsloop_variable_subEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
+
 !CHECK:           %[[VAL_22:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_23:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i8>
 !CHECK:           %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i16>
 !CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32
 !CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32
-!CHECK:           omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]] : !fir.ref<i16>) {
+!CHECK:           omp.wsloop {
 !CHECK-NEXT:        omp.loop_nest (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
-!CHECK:               %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFwsloop_variable_subEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
 !CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16
 !CHECK:               fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref<i16>
 !CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i128>
@@ -170,13 +172,14 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_49:.*]] = arith.constant 5 : i8
 !CHECK:           hlfir.assign %[[VAL_49]] to %[[VAL_19]]#0 : i8, !fir.ref<i8>
 
+!CHECK:           %[[VAL_0:.*]] = fir.alloca i8 {bindc_name = "i1", pinned, {{.*}}}
+!CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFwsloop_variable_subEi1"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
 
 !CHECK:           %[[VAL_50:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_51:.*]] = arith.constant 10 : i32
 !CHECK:           %[[VAL_52:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i8>) {
+!CHECK:           omp.wsloop {
 !CHECK-NEXT:        omp.loop_nest (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
-!CHECK:               %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFwsloop_variable_subEi1"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
 !CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8
 !CHECK:               fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref<i8>
 !CHECK:               %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i8>
diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90
index 44b2f585b3a67..4378233a622ed 100644
--- a/flang/test/Lower/OpenMP/wsloop.f90
+++ b/flang/test/Lower/OpenMP/wsloop.f90
@@ -7,14 +7,15 @@ subroutine simple_loop
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<i32>) {
+  ! CHECK:      omp.wsloop {
   ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! CHECK:          %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:          fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref<i32>
   ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -31,12 +32,13 @@ subroutine simple_loop_with_step
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:      omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<i32>) {
+  ! CHECK:      omp.wsloop {
   ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
-  ! CHECK:          %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:          fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
   ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   !$OMP DO
@@ -55,14 +57,15 @@ subroutine loop_with_schedule_nowait
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned, {{.*}}}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:      omp.wsloop nowait schedule(runtime) private(@{{.*}} %{{.*}}#0 -> %[[ALLOCA_IV:.*]] : !fir.ref<i32>) {
+  ! CHECK:      omp.wsloop nowait schedule(runtime) {
   ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
-  ! CHECK:          %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:          fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
   ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
diff --git a/flang/test/Semantics/OpenMP/declare-target08.f90 b/flang/test/Semantics/OpenMP/declare-target08.f90
new file mode 100644
index 0000000000000..1438d79d37348
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/declare-target08.f90
@@ -0,0 +1,41 @@
+! RUN: %flang_fc1 -fopenmp -fdebug-dump-symbols %s | FileCheck %s
+
+subroutine bar(i, a)
+    !$omp declare target
+    real :: a
+    integer :: i
+    a = a - i
+end subroutine
+
+function baz(a)
+    !$omp declare target
+    real, intent(in) :: a
+    baz = a
+end function baz
+
+program main
+real a
+!CHECK: bar (Subroutine, OmpDeclareTarget): HostAssoc
+!CHECK: baz (Function, OmpDeclareTarget): HostAssoc
+!$omp declare target(bar)
+!$omp declare target(baz)
+
+a = baz(a)
+call bar(2,a)
+call foo(a)
+return
+end
+
+subroutine foo(a)
+real a
+integer i
+!CHECK: bar (Subroutine, OmpDeclareTarget): HostAssoc
+!CHECK: baz (Function, OmpDeclareTarget): HostAssoc
+!$omp declare target(bar)
+!$omp declare target(baz)
+!$omp target
+    a = baz(a)
+    call bar(i,a)
+!$omp end target
+return
+end
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index 9ea6bf001b668..becd6b8dcb5cb 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,18 +1,5 @@
 // RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
 
-func.func @_QPtarget_loop() {
-  %c0 = arith.constant 0 : i32
-  %c10 = arith.constant 10 : i32
-  %c1 = arith.constant 1 : i32
-  // expected-error@below {{not yet implemented: Standalone `omp loop` directive}}
-  omp.loop {
-    omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-      omp.yield
-    }
-  }
-  return
-}
-
 func.func @_QPtarget_parallel_loop() {
   omp.target {
     omp.parallel {
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 4dbe5e046cc68..96fa6c3a707e4 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -35,7 +35,7 @@ function(_get_common_test_compile_options output_var c_test flags)
     # list(APPEND compile_options "-Wno-sign-conversion")
     # list(APPEND compile_options "-Wimplicit-fallthrough")
     # list(APPEND compile_options "-Wwrite-strings")
-    # list(APPEND compile_options "-Wextra-semi")
+    list(APPEND compile_options "-Wextra-semi")
     # Silence this warning because _Complex is a part of C99.
     if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
       if(NOT c_test)
diff --git a/libc/test/src/math/smoke/nan_test.cpp b/libc/test/src/math/smoke/nan_test.cpp
index 46b9e9aa9563a..da6beb94c7f05 100644
--- a/libc/test/src/math/smoke/nan_test.cpp
+++ b/libc/test/src/math/smoke/nan_test.cpp
@@ -23,7 +23,7 @@ class LlvmLibcNanTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     auto actual_fp = LIBC_NAMESPACE::fputil::FPBits<double>(result);
     auto expected_fp = LIBC_NAMESPACE::fputil::FPBits<double>(bits);
     EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
-  };
+  }
 };
 
 TEST_F(LlvmLibcNanTest, NCharSeq) {
diff --git a/libc/test/src/math/smoke/nanf128_test.cpp b/libc/test/src/math/smoke/nanf128_test.cpp
index 25dd2ef1d5b1c..dd1986f17b978 100644
--- a/libc/test/src/math/smoke/nanf128_test.cpp
+++ b/libc/test/src/math/smoke/nanf128_test.cpp
@@ -28,7 +28,7 @@ class LlvmLibcNanf128Test : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     auto actual_fp = FPBits128(result);
     auto expected_fp = FPBits128(bits);
     EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
-  };
+  }
 };
 
 TEST_F(LlvmLibcNanf128Test, NCharSeq) {
diff --git a/libc/test/src/math/smoke/nanf16_test.cpp b/libc/test/src/math/smoke/nanf16_test.cpp
index ec640a3b9eef9..5fafb1a36e4cd 100644
--- a/libc/test/src/math/smoke/nanf16_test.cpp
+++ b/libc/test/src/math/smoke/nanf16_test.cpp
@@ -23,7 +23,7 @@ class LlvmLibcNanf16Test : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     auto actual_fp = LIBC_NAMESPACE::fputil::FPBits<float16>(result);
     auto expected_fp = LIBC_NAMESPACE::fputil::FPBits<float16>(bits);
     EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
-  };
+  }
 };
 
 TEST_F(LlvmLibcNanf16Test, NCharSeq) {
diff --git a/libc/test/src/math/smoke/nanf_test.cpp b/libc/test/src/math/smoke/nanf_test.cpp
index dd3124ee9c511..19d94b40b5ffb 100644
--- a/libc/test/src/math/smoke/nanf_test.cpp
+++ b/libc/test/src/math/smoke/nanf_test.cpp
@@ -23,7 +23,7 @@ class LlvmLibcNanfTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     auto actual_fp = LIBC_NAMESPACE::fputil::FPBits<float>(result);
     auto expected_fp = LIBC_NAMESPACE::fputil::FPBits<float>(bits);
     EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
-  };
+  }
 };
 
 TEST_F(LlvmLibcNanfTest, NCharSeq) {
diff --git a/libc/test/src/math/smoke/nanl_test.cpp b/libc/test/src/math/smoke/nanl_test.cpp
index ef3f9c15dafd9..c7217928e943b 100644
--- a/libc/test/src/math/smoke/nanl_test.cpp
+++ b/libc/test/src/math/smoke/nanl_test.cpp
@@ -33,7 +33,7 @@ class LlvmLibcNanlTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     auto actual_fp = LIBC_NAMESPACE::fputil::FPBits<long double>(result);
     auto expected_fp = LIBC_NAMESPACE::fputil::FPBits<long double>(bits);
     EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
-  };
+  }
 };
 
 TEST_F(LlvmLibcNanlTest, NCharSeq) {
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
index 44928b2e428bf..676560e9efcb4 100644
--- a/libclc/clc/include/clc/clcmacro.h
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -227,6 +227,8 @@
   }                                                                            \
   _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)
 
+#pragma OPENCL EXTENSION cl_khr_fp16 : disable
+
 #else
 
 #define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)
diff --git a/libclc/clc/include/clc/relational/clc_select.h b/libclc/clc/include/clc/relational/clc_select.h
index ddea7c528ec74..a92f2051b577d 100644
--- a/libclc/clc/include/clc/relational/clc_select.h
+++ b/libclc/clc/include/clc/relational/clc_select.h
@@ -1,23 +1,19 @@
 #ifndef __CLC_RELATIONAL_CLC_SELECT_H__
 #define __CLC_RELATIONAL_CLC_SELECT_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible select
-#define __clc_select select
-#else
-
 /* Duplciate these so we don't have to distribute utils.h */
 #define __CLC_CONCAT(x, y) x##y
 #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
 
-#define __CLC_BODY <clc/relational/clc_select.inc>
+#define __CLC_SELECT_FN __clc_select
+
+#define __CLC_BODY <clc/relational/clc_select_decl.inc>
 #include <clc/math/gentype.inc>
-#define __CLC_BODY <clc/relational/clc_select.inc>
+#define __CLC_BODY <clc/relational/clc_select_decl.inc>
 #include <clc/integer/gentype.inc>
 
+#undef __CLC_SELECT_FN
 #undef __CLC_CONCAT
 #undef __CLC_XCONCAT
 
-#endif
-
 #endif // __CLC_RELATIONAL_CLC_SELECT_H__
diff --git a/libclc/clc/include/clc/relational/clc_select.inc b/libclc/clc/include/clc/relational/clc_select_decl.inc
similarity index 58%
rename from libclc/clc/include/clc/relational/clc_select.inc
rename to libclc/clc/include/clc/relational/clc_select_decl.inc
index abf0e0fa43600..3a4f2dcb75170 100644
--- a/libclc/clc/include/clc/relational/clc_select.inc
+++ b/libclc/clc/include/clc/relational/clc_select_decl.inc
@@ -13,12 +13,12 @@
 #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
 #endif
 
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
-                                                   __CLC_GENTYPE y,
-                                                   __CLC_S_GENTYPE z);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
-                                                   __CLC_GENTYPE y,
-                                                   __CLC_U_GENTYPE z);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x,
+                                                      __CLC_GENTYPE y,
+                                                      __CLC_S_GENTYPE z);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x,
+                                                      __CLC_GENTYPE y,
+                                                      __CLC_U_GENTYPE z);
 
 #ifdef __CLC_FPSIZE
 #undef __CLC_S_GENTYPE
diff --git a/libclc/clc/lib/generic/relational/clc_select.inc b/libclc/clc/include/clc/relational/clc_select_impl.inc
similarity index 55%
rename from libclc/clc/lib/generic/relational/clc_select.inc
rename to libclc/clc/include/clc/relational/clc_select_impl.inc
index 47db80672a02c..ad53e822179fb 100644
--- a/libclc/clc/lib/generic/relational/clc_select.inc
+++ b/libclc/clc/include/clc/relational/clc_select_impl.inc
@@ -13,16 +13,16 @@
 #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
 #endif
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
-                                                  __CLC_GENTYPE y,
-                                                  __CLC_S_GENTYPE z) {
-  return z ? y : x;
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x,
+                                                     __CLC_GENTYPE y,
+                                                     __CLC_S_GENTYPE z) {
+  __CLC_SELECT_DEF(x, y, z);
 }
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
-                                                  __CLC_GENTYPE y,
-                                                  __CLC_U_GENTYPE z) {
-  return z ? y : x;
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x,
+                                                     __CLC_GENTYPE y,
+                                                     __CLC_U_GENTYPE z) {
+  __CLC_SELECT_DEF(x, y, z);
 }
 
 #ifdef __CLC_FPSIZE
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index 209dc0ca61e2b..c3fc03c0b3dd5 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -4,4 +4,5 @@
 ../generic/math/clc_mad.cl
 ../generic/math/clc_rint.cl
 ../generic/math/clc_trunc.cl
+../generic/relational/clc_select.cl
 ../generic/shared/clc_clamp.cl
diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.cl b/libclc/clc/lib/generic/relational/clc_bitselect.cl
index 66b28af71b38d..6281eeea1abb2 100644
--- a/libclc/clc/lib/generic/relational/clc_bitselect.cl
+++ b/libclc/clc/lib/generic/relational/clc_bitselect.cl
@@ -53,3 +53,15 @@ FLOAT_BITSELECT(double, ulong, 8)
 FLOAT_BITSELECT(double, ulong, 16)
 
 #endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+FLOAT_BITSELECT(half, ushort, )
+FLOAT_BITSELECT(half, ushort, 2)
+FLOAT_BITSELECT(half, ushort, 3)
+FLOAT_BITSELECT(half, ushort, 4)
+FLOAT_BITSELECT(half, ushort, 8)
+FLOAT_BITSELECT(half, ushort, 16)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_select.cl b/libclc/clc/lib/generic/relational/clc_select.cl
index bb016ed2993e7..210db7867eefb 100644
--- a/libclc/clc/lib/generic/relational/clc_select.cl
+++ b/libclc/clc/lib/generic/relational/clc_select.cl
@@ -1,7 +1,10 @@
 #include <clc/internal/clc.h>
 #include <clc/utils.h>
 
-#define __CLC_BODY <clc_select.inc>
+#define __CLC_SELECT_FN __clc_select
+#define __CLC_SELECT_DEF(x, y, z) return z ? y : x
+
+#define __CLC_BODY <clc/relational/clc_select_impl.inc>
 #include <clc/math/gentype.inc>
-#define __CLC_BODY <clc_select.inc>
+#define __CLC_BODY <clc/relational/clc_select_impl.inc>
 #include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index 905afa03d8a56..55d109478faac 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -8,4 +8,5 @@
 ../generic/math/clc_mad.cl
 ../generic/math/clc_rint.cl
 ../generic/math/clc_trunc.cl
+../generic/relational/clc_select.cl
 ../generic/shared/clc_clamp.cl
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
index 905afa03d8a56..55d109478faac 100644
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -8,4 +8,5 @@
 ../generic/math/clc_mad.cl
 ../generic/math/clc_rint.cl
 ../generic/math/clc_trunc.cl
+../generic/relational/clc_select.cl
 ../generic/shared/clc_clamp.cl
diff --git a/libclc/generic/include/clc/relational/select.h b/libclc/generic/include/clc/relational/select.h
index d20deae0d2cf9..847884a07b7f2 100644
--- a/libclc/generic/include/clc/relational/select.h
+++ b/libclc/generic/include/clc/relational/select.h
@@ -2,10 +2,13 @@
 #define __CLC_CONCAT(x, y) x ## y
 #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
 
-#define __CLC_BODY <clc/relational/select.inc>
+#define __CLC_SELECT_FN select
+
+#define __CLC_BODY <clc/relational/clc_select_decl.inc>
 #include <clc/math/gentype.inc>
-#define __CLC_BODY <clc/relational/select.inc>
+#define __CLC_BODY <clc/relational/clc_select_decl.inc>
 #include <clc/integer/gentype.inc>
 
+#undef __CLC_SELECT_FN
 #undef __CLC_CONCAT
 #undef __CLC_XCONCAT
diff --git a/libclc/generic/include/clc/relational/select.inc b/libclc/generic/include/clc/relational/select.inc
deleted file mode 100644
index 11a957a56e4b7..0000000000000
--- a/libclc/generic/include/clc/relational/select.inc
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifdef __CLC_SCALAR
-#define __CLC_VECSIZE
-#endif
-
-#if __CLC_FPSIZE == 64
-#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
-#elif __CLC_FPSIZE == 32
-#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
-#elif __CLC_FPSIZE == 16
-#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
-#endif
-
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z);
-
-#ifdef __CLC_FPSIZE
-#undef __CLC_S_GENTYPE
-#undef __CLC_U_GENTYPE
-#endif
-#ifdef __CLC_SCALAR
-#undef __CLC_VECSIZE
-#endif
diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl
index aeb72872da342..d71d10024b180 100644
--- a/libclc/generic/lib/math/acos.cl
+++ b/libclc/generic/lib/math/acos.cl
@@ -172,4 +172,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double);
 
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(acos)
+
+#endif
diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl
index 4656f14457d42..977c2e929b34c 100644
--- a/libclc/generic/lib/math/acosh.cl
+++ b/libclc/generic/lib/math/acosh.cl
@@ -126,4 +126,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(acosh)
+
+#endif
diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl
index 83a47eb27e836..5aa8a083df4e9 100644
--- a/libclc/generic/lib/math/acospi.cl
+++ b/libclc/generic/lib/math/acospi.cl
@@ -171,4 +171,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(acospi)
+
+#endif
diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl
index f7637ade227ee..686d9f7a95e5d 100644
--- a/libclc/generic/lib/math/asinh.cl
+++ b/libclc/generic/lib/math/asinh.cl
@@ -292,4 +292,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(asinh)
+
+#endif
diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl
index 28eaaf7b2ae72..b6b067f2bf4ff 100644
--- a/libclc/generic/lib/math/atan.cl
+++ b/libclc/generic/lib/math/atan.cl
@@ -182,5 +182,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double);
 
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 _CLC_DEFINE_UNARY_BUILTIN_FP16(atan)
+
+#endif
diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl
index 98b457a69a0e2..32419937a856b 100644
--- a/libclc/generic/lib/math/atan2.cl
+++ b/libclc/generic/lib/math/atan2.cl
@@ -236,4 +236,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_BINARY_BUILTIN_FP16(atan2)
+
+#endif
diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl
index ad41b11bd3741..e631918f7539e 100644
--- a/libclc/generic/lib/math/atan2pi.cl
+++ b/libclc/generic/lib/math/atan2pi.cl
@@ -220,4 +220,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi)
+
+#endif
diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl
index f2298a2624e84..10bad190cc0dc 100644
--- a/libclc/generic/lib/math/atanh.cl
+++ b/libclc/generic/lib/math/atanh.cl
@@ -112,4 +112,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(atanh)
+
+#endif
diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl
index 9e6b3ece7f325..8522acf349933 100644
--- a/libclc/generic/lib/math/atanpi.cl
+++ b/libclc/generic/lib/math/atanpi.cl
@@ -181,4 +181,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi)
+
+#endif
diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl
index 8462f5f6e14bb..49f2ba8a411cb 100644
--- a/libclc/generic/lib/math/cbrt.cl
+++ b/libclc/generic/lib/math/cbrt.cl
@@ -150,4 +150,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt)
+
+#endif
diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl
index 42192895151a3..2945c90d6fe0f 100644
--- a/libclc/generic/lib/math/cos.cl
+++ b/libclc/generic/lib/math/cos.cl
@@ -76,4 +76,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double);
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(cos)
+
+#endif
diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl
index 1f58d7acd2b55..02b03a761aa19 100644
--- a/libclc/generic/lib/math/cosh.cl
+++ b/libclc/generic/lib/math/cosh.cl
@@ -191,4 +191,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
+
+#endif
diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl
index 0e69f7885faa1..98e989267c917 100644
--- a/libclc/generic/lib/math/cospi.cl
+++ b/libclc/generic/lib/math/cospi.cl
@@ -135,4 +135,10 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(cospi)
+
+#endif
diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl
index 1e37d76f3ac8e..95dc0db44df76 100644
--- a/libclc/generic/lib/math/exp.cl
+++ b/libclc/generic/lib/math/exp.cl
@@ -89,4 +89,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(exp)
+
+#endif
diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl
index fbb9f0d087c89..d50a88e0aaae8 100644
--- a/libclc/generic/lib/math/expm1.cl
+++ b/libclc/generic/lib/math/expm1.cl
@@ -141,4 +141,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(expm1)
+
+#endif
diff --git a/libclc/generic/lib/math/lgamma.cl b/libclc/generic/lib/math/lgamma.cl
index ca7b9610a31b3..f0476230e63fe 100644
--- a/libclc/generic/lib/math/lgamma.cl
+++ b/libclc/generic/lib/math/lgamma.cl
@@ -43,4 +43,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma)
+
+#endif
diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl
index a371995a08492..7fef79ca7b8f3 100644
--- a/libclc/generic/lib/math/log1p.cl
+++ b/libclc/generic/lib/math/log1p.cl
@@ -176,4 +176,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double);
 
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(log1p)
+
+#endif
diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl
index 7a7111d5bc84d..d91972f780ba9 100644
--- a/libclc/generic/lib/math/logb.cl
+++ b/libclc/generic/lib/math/logb.cl
@@ -30,4 +30,10 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
+
+#endif
diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl
index 30638a57f59b5..c271e67828066 100644
--- a/libclc/generic/lib/math/sin.cl
+++ b/libclc/generic/lib/math/sin.cl
@@ -78,4 +78,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double);
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(sin)
+
+#endif
diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl
index 3de0792361c2f..23500c1f49b7a 100644
--- a/libclc/generic/lib/math/sinh.cl
+++ b/libclc/generic/lib/math/sinh.cl
@@ -190,4 +190,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
+
+#endif
diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl
index 520bba5415c7c..01b340b855c44 100644
--- a/libclc/generic/lib/math/sinpi.cl
+++ b/libclc/generic/lib/math/sinpi.cl
@@ -130,4 +130,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi)
+
+#endif
diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl
index e558bb93a5596..d9509c57b0507 100644
--- a/libclc/generic/lib/math/tanh.cl
+++ b/libclc/generic/lib/math/tanh.cl
@@ -145,4 +145,10 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
 
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 _CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)
+
+#endif
diff --git a/libclc/generic/lib/relational/bitselect.cl b/libclc/generic/lib/relational/bitselect.cl
index a470447f1fb91..04aae105b7f2c 100644
--- a/libclc/generic/lib/relational/bitselect.cl
+++ b/libclc/generic/lib/relational/bitselect.cl
@@ -21,34 +21,10 @@
  */
 
 #include <clc/clc.h>
-#include <clc/clcmacro.h>
 #include <clc/relational/clc_bitselect.h>
 
 #define __CLC_BODY <bitselect.inc>
 #include <clc/integer/gentype.inc>
-#undef __CLC_BODY
 
-#define FLOAT_BITSELECT(f_type, i_type, width)                                 \
-  _CLC_OVERLOAD _CLC_DEF f_type##width bitselect(                              \
-      f_type##width x, f_type##width y, f_type##width z) {                     \
-    return __clc_bitselect(x, y, z);                                           \
-  }
-
-FLOAT_BITSELECT(float, uint, )
-FLOAT_BITSELECT(float, uint, 2)
-FLOAT_BITSELECT(float, uint, 3)
-FLOAT_BITSELECT(float, uint, 4)
-FLOAT_BITSELECT(float, uint, 8)
-FLOAT_BITSELECT(float, uint, 16)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-FLOAT_BITSELECT(double, ulong, )
-FLOAT_BITSELECT(double, ulong, 2)
-FLOAT_BITSELECT(double, ulong, 3)
-FLOAT_BITSELECT(double, ulong, 4)
-FLOAT_BITSELECT(double, ulong, 8)
-FLOAT_BITSELECT(double, ulong, 16)
-
-#endif
+#define __CLC_BODY <bitselect.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/relational/bitselect.inc b/libclc/generic/lib/relational/bitselect.inc
index 3a78a8c7b7487..b0d64bddffdfd 100644
--- a/libclc/generic/lib/relational/bitselect.inc
+++ b/libclc/generic/lib/relational/bitselect.inc
@@ -20,6 +20,7 @@
  * THE SOFTWARE.
  */
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE bitselect(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
-  return ((x) ^ ((z) & ((y) ^ (x))));
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE bitselect(__CLC_GENTYPE x, __CLC_GENTYPE y,
+                                               __CLC_GENTYPE z) {
+  return __clc_bitselect(x, y, z);
 }
diff --git a/libclc/generic/lib/relational/select.cl b/libclc/generic/lib/relational/select.cl
index 094f4f9f29fa9..663f9d7ccf990 100644
--- a/libclc/generic/lib/relational/select.cl
+++ b/libclc/generic/lib/relational/select.cl
@@ -1,7 +1,11 @@
 #include <clc/clc.h>
+#include <clc/relational/clc_select.h>
 #include <clc/utils.h>
 
-#define __CLC_BODY <select.inc>
+#define __CLC_SELECT_FN select
+#define __CLC_SELECT_DEF(x, y, z) return __clc_select(x, y, z)
+
+#define __CLC_BODY <clc/relational/clc_select_impl.inc>
 #include <clc/math/gentype.inc>
-#define __CLC_BODY <select.inc>
+#define __CLC_BODY <clc/relational/clc_select_impl.inc>
 #include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/relational/select.inc b/libclc/generic/lib/relational/select.inc
deleted file mode 100644
index 7691af2669186..0000000000000
--- a/libclc/generic/lib/relational/select.inc
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifdef __CLC_SCALAR
-#define __CLC_VECSIZE
-#endif
-
-#if __CLC_FPSIZE == 64
-#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
-#elif __CLC_FPSIZE == 32
-#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
-#elif __CLC_FPSIZE == 16
-#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
-#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
-#endif
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z)
-{
-	return z ? y : x;
-}
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z)
-{
-	return z ? y : x;
-}
-
-#ifdef __CLC_FPSIZE
-#undef __CLC_S_GENTYPE
-#undef __CLC_U_GENTYPE
-#endif
-
-#ifdef __CLC_SCALAR
-#undef __CLC_VECSIZE
-#endif
diff --git a/libcxx/docs/CodingGuidelines.rst b/libcxx/docs/CodingGuidelines.rst
index 1bb62072e886d..9bf23a4cfe08b 100644
--- a/libcxx/docs/CodingGuidelines.rst
+++ b/libcxx/docs/CodingGuidelines.rst
@@ -184,3 +184,13 @@ headers (which is sometimes required for ``constexpr`` support).
 
 When defining a function at the ABI boundary, it can also be useful to consider which attributes (like ``[[gnu::pure]]``
 and ``[[clang::noescape]]``) can be added to the function to improve the compiler's ability to optimize.
+
+Library-internal type aliases should be annotated with ``_LIBCPP_NODEBUG``
+==========================================================================
+
+Libc++ has lots of internal type aliases. Accumulated, these can result in significant amounts of debug information that
+users generally don't care about, since users don't try to debug standard library facilities in most cases. For that
+reason, all library-internal type aliases that aren't function-local should be annotated with ``_LIBCPP_NODEBUG`` to
+prevent compilers from generating said debug information.
+
+This is enforced by the clang-tidy check ``libcpp-nodebug-on-aliases``.
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 9587f01f64d58..a02a88399a7a7 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -585,9 +585,9 @@ __uninitialized_allocator_copy_impl(_Alloc&, _In* __first1, _In* __last1, _Out*
 template <class _Alloc, class _Iter1, class _Sent1, class _Iter2>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter2
 __uninitialized_allocator_copy(_Alloc& __alloc, _Iter1 __first1, _Sent1 __last1, _Iter2 __first2) {
-  auto __unwrapped_range = std::__unwrap_range(__first1, __last1);
+  auto __unwrapped_range = std::__unwrap_range(std::move(__first1), std::move(__last1));
   auto __result          = std::__uninitialized_allocator_copy_impl(
-      __alloc, __unwrapped_range.first, __unwrapped_range.second, std::__unwrap_iter(__first2));
+      __alloc, std::move(__unwrapped_range.first), std::move(__unwrapped_range.second), std::__unwrap_iter(__first2));
   return std::__rewrap_iter(__first2, __result);
 }
 
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index d94aca6788c87..66cb622e20963 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -10,11 +10,13 @@
 #define _LIBCPP___VECTOR_VECTOR_H
 
 #include <__algorithm/copy.h>
+#include <__algorithm/copy_n.h>
 #include <__algorithm/fill_n.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
+#include <__algorithm/ranges_copy_n.h>
 #include <__algorithm/rotate.h>
 #include <__assert>
 #include <__config>
@@ -23,6 +25,7 @@
 #include <__fwd/vector.h>
 #include <__iterator/advance.h>
 #include <__iterator/bounded_iter.h>
+#include <__iterator/concepts.h>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/move_iterator.h>
@@ -575,7 +578,7 @@ class _LIBCPP_TEMPLATE_VIS vector {
 
     if (__n > 0) {
       __vallocate(__n);
-      __construct_at_end(__first, __last, __n);
+      __construct_at_end(std::move(__first), std::move(__last), __n);
     }
 
     __guard.__complete();
@@ -595,9 +598,12 @@ class _LIBCPP_TEMPLATE_VIS vector {
   template <class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __first, _Sentinel __last);
 
-  template <class _ForwardIterator, class _Sentinel>
+  // The `_Iterator` in `*_with_size` functions can be input-only only if called from `*_range` (since C++23).
+  // Otherwise, `_Iterator` is a forward iterator.
+
+  template <class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-  __assign_with_size(_ForwardIterator __first, _Sentinel __last, difference_type __n);
+  __assign_with_size(_Iterator __first, _Sentinel __last, difference_type __n);
 
   template <class _InputIterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator
@@ -918,7 +924,7 @@ template <class _InputIterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 vector<_Tp, _Allocator>::__construct_at_end(_InputIterator __first, _Sentinel __last, size_type __n) {
   _ConstructTransaction __tx(*this, __n);
-  __tx.__pos_ = std::__uninitialized_allocator_copy(this->__alloc_, __first, __last, __tx.__pos_);
+  __tx.__pos_ = std::__uninitialized_allocator_copy(this->__alloc_, std::move(__first), std::move(__last), __tx.__pos_);
 }
 
 //  Default constructs __n objects starting at __end_
@@ -1027,23 +1033,28 @@ vector<_Tp, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __l
 }
 
 template <class _Tp, class _Allocator>
-template <class _ForwardIterator, class _Sentinel>
+template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-vector<_Tp, _Allocator>::__assign_with_size(_ForwardIterator __first, _Sentinel __last, difference_type __n) {
+vector<_Tp, _Allocator>::__assign_with_size(_Iterator __first, _Sentinel __last, difference_type __n) {
   size_type __new_size = static_cast<size_type>(__n);
   if (__new_size <= capacity()) {
     if (__new_size > size()) {
-      _ForwardIterator __mid = std::next(__first, size());
+#if _LIBCPP_STD_VER >= 23
+      auto __mid = ranges::copy_n(std::move(__first), size(), this->__begin_).in;
+      __construct_at_end(std::move(__mid), std::move(__last), __new_size - size());
+#else
+      _Iterator __mid = std::next(__first, size());
       std::copy(__first, __mid, this->__begin_);
       __construct_at_end(__mid, __last, __new_size - size());
+#endif
     } else {
-      pointer __m = std::__copy(__first, __last, this->__begin_).second;
+      pointer __m = std::__copy(std::move(__first), __last, this->__begin_).second;
       this->__destruct_at_end(__m);
     }
   } else {
     __vdeallocate();
     __vallocate(__recommend(__new_size));
-    __construct_at_end(__first, __last, __new_size);
+    __construct_at_end(std::move(__first), std::move(__last), __new_size);
   }
 }
 
@@ -1293,28 +1304,40 @@ template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI typename vector<_Tp, _Allocator>::iterator
 vector<_Tp, _Allocator>::__insert_with_size(
     const_iterator __position, _Iterator __first, _Sentinel __last, difference_type __n) {
-  auto __insertion_size = __n;
-  pointer __p           = this->__begin_ + (__position - begin());
+  pointer __p = this->__begin_ + (__position - begin());
   if (__n > 0) {
     if (__n <= this->__cap_ - this->__end_) {
-      size_type __old_n    = __n;
       pointer __old_last   = this->__end_;
-      _Iterator __m        = std::next(__first, __n);
       difference_type __dx = this->__end_ - __p;
       if (__n > __dx) {
-        __m                    = __first;
-        difference_type __diff = this->__end_ - __p;
-        std::advance(__m, __diff);
-        __construct_at_end(__m, __last, __n - __diff);
-        __n = __dx;
-      }
-      if (__n > 0) {
-        __move_range(__p, __old_last, __p + __old_n);
-        std::copy(__first, __m, __p);
+#if _LIBCPP_STD_VER >= 23
+        if constexpr (!forward_iterator<_Iterator>) {
+          __construct_at_end(std::move(__first), std::move(__last), __n);
+          std::rotate(__p, __old_last, this->__end_);
+        } else
+#endif
+        {
+          _Iterator __m = std::next(__first, __dx);
+          __construct_at_end(__m, __last, __n - __dx);
+          if (__dx > 0) {
+            __move_range(__p, __old_last, __p + __n);
+            std::copy(__first, __m, __p);
+          }
+        }
+      } else {
+        __move_range(__p, __old_last, __p + __n);
+#if _LIBCPP_STD_VER >= 23
+        if constexpr (!forward_iterator<_Iterator>) {
+          ranges::copy_n(std::move(__first), __n, __p);
+        } else
+#endif
+        {
+          std::copy_n(__first, __n, __p);
+        }
       }
     } else {
       __split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_);
-      __v.__construct_at_end_with_size(__first, __insertion_size);
+      __v.__construct_at_end_with_size(std::move(__first), __n);
       __p = __swap_out_circular_buffer(__v, __p);
     }
   }
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 2b721e00058bc..4f1c442ce0be8 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -420,9 +420,12 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
   template <class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __first, _Sentinel __last);
 
-  template <class _ForwardIterator, class _Sentinel>
+  // The `_Iterator` in `*_with_size` functions can be input-only only if called from `*_range` (since C++23).
+  // Otherwise, `_Iterator` is a forward iterator.
+
+  template <class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-  __assign_with_size(_ForwardIterator __first, _Sentinel __last, difference_type __ns);
+  __assign_with_size(_Iterator __first, _Sentinel __last, difference_type __ns);
 
   template <class _InputIterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator
@@ -578,7 +581,7 @@ vector<bool, _Allocator>::__construct_at_end(_InputIterator __first, _Sentinel _
     else
       this->__begin_[(this->__size_ - 1) / __bits_per_word] = __storage_type(0);
   }
-  std::__copy(__first, __last, __make_iter(__old_size));
+  std::__copy(std::move(__first), std::move(__last), __make_iter(__old_size));
 }
 
 template <class _Allocator>
@@ -828,9 +831,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::assign(_ForwardIter
 }
 
 template <class _Allocator>
-template <class _ForwardIterator, class _Sentinel>
+template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-vector<bool, _Allocator>::__assign_with_size(_ForwardIterator __first, _Sentinel __last, difference_type __ns) {
+vector<bool, _Allocator>::__assign_with_size(_Iterator __first, _Sentinel __last, difference_type __ns) {
   _LIBCPP_ASSERT_VALID_INPUT_RANGE(__ns >= 0, "invalid range specified");
 
   clear();
@@ -841,7 +844,7 @@ vector<bool, _Allocator>::__assign_with_size(_ForwardIterator __first, _Sentinel
       __vdeallocate();
       __vallocate(__n);
     }
-    __construct_at_end(__first, __last, __n);
+    __construct_at_end(std::move(__first), std::move(__last), __n);
   }
 }
 
@@ -859,11 +862,13 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::reserve(size_type _
 
 template <class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::shrink_to_fit() _NOEXCEPT {
-  if (__external_cap_to_internal(size()) > __cap_) {
+  if (__external_cap_to_internal(size()) < __cap_) {
 #if _LIBCPP_HAS_EXCEPTIONS
     try {
 #endif // _LIBCPP_HAS_EXCEPTIONS
-      vector(*this, allocator_type(__alloc_)).swap(*this);
+      vector __v(*this, allocator_type(__alloc_));
+      if (__v.__cap_ < __cap_)
+        __v.swap(*this);
 #if _LIBCPP_HAS_EXCEPTIONS
     } catch (...) {
     }
@@ -986,10 +991,10 @@ vector<bool, _Allocator>::insert(const_iterator __position, _ForwardIterator __f
 }
 
 template <class _Allocator>
-template <class _ForwardIterator, class _Sentinel>
+template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI typename vector<bool, _Allocator>::iterator
 vector<bool, _Allocator>::__insert_with_size(
-    const_iterator __position, _ForwardIterator __first, _Sentinel __last, difference_type __n_signed) {
+    const_iterator __position, _Iterator __first, _Sentinel __last, difference_type __n_signed) {
   _LIBCPP_ASSERT_VALID_INPUT_RANGE(__n_signed >= 0, "invalid range specified");
   const size_type __n = static_cast<size_type>(__n_signed);
   iterator __r;
@@ -1007,7 +1012,7 @@ vector<bool, _Allocator>::__insert_with_size(
     std::copy_backward(__position, cend(), __v.end());
     swap(__v);
   }
-  std::__copy(__first, __last, __r);
+  std::__copy(std::move(__first), std::move(__last), __r);
   return __r;
 }
 
diff --git a/libcxx/include/string b/libcxx/include/string
index 39982d5670bdb..fdd8085106dcc 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1919,7 +1919,8 @@ private:
   __copy_non_overlapping_range(_ForwardIter __first, _Sent __last, value_type* __dest) {
 #  ifndef _LIBCPP_CXX03_LANG
     if constexpr (__libcpp_is_contiguous_iterator<_ForwardIter>::value &&
-                  is_same<value_type, __iter_value_type<_ForwardIter>>::value && is_same<_ForwardIter, _Sent>::value) {
+                  is_same<value_type, __remove_cvref_t<decltype(*__first)>>::value &&
+                  is_same<_ForwardIter, _Sent>::value) {
       _LIBCPP_ASSERT_INTERNAL(
           !std::__is_overlapping_range(std::__to_address(__first), std::__to_address(__last), __dest),
           "__copy_non_overlapping_range called with an overlapping range!");
@@ -1952,7 +1953,7 @@ private:
     __sz += __n;
     __set_size(__sz);
     traits_type::assign(__p[__sz], value_type());
-    __copy_non_overlapping_range(__first, __last, __p + __ip);
+    __copy_non_overlapping_range(std::move(__first), std::move(__last), __p + __ip);
 
     return begin() + __ip;
   }
@@ -2488,7 +2489,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init_with_size(_InputIterator __fir
 #  if _LIBCPP_HAS_EXCEPTIONS
   try {
 #  endif // _LIBCPP_HAS_EXCEPTIONS
-    auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__p));
+    auto __end = __copy_non_overlapping_range(std::move(__first), std::move(__last), std::__to_address(__p));
     traits_type::assign(*__end, value_type());
 #  if _LIBCPP_HAS_EXCEPTIONS
   } catch (...) {
@@ -3080,9 +3081,9 @@ basic_string<_CharT, _Traits, _Allocator>::__insert_with_size(
     return begin() + __ip;
 
   if (__string_is_trivial_iterator<_Iterator>::value && !__addr_in_range(*__first)) {
-    return __insert_from_safe_copy(__n, __ip, __first, __last);
+    return __insert_from_safe_copy(__n, __ip, std::move(__first), std::move(__last));
   } else {
-    const basic_string __temp(__init_with_sentinel_tag(), __first, __last, __alloc_);
+    const basic_string __temp(__init_with_sentinel_tag(), std::move(__first), std::move(__last), __alloc_);
     return __insert_from_safe_copy(__n, __ip, __temp.begin(), __temp.end());
   }
 }
diff --git a/libcxx/include/tuple b/libcxx/include/tuple
index 0c96786ae6d02..5d968bfd4015a 100644
--- a/libcxx/include/tuple
+++ b/libcxx/include/tuple
@@ -257,6 +257,7 @@ template <class... Types>
 #  include <__type_traits/maybe_const.h>
 #  include <__type_traits/nat.h>
 #  include <__type_traits/negation.h>
+#  include <__type_traits/remove_cv.h>
 #  include <__type_traits/remove_cvref.h>
 #  include <__type_traits/remove_reference.h>
 #  include <__type_traits/unwrap_ref.h>
@@ -390,7 +391,7 @@ public:
 };
 
 template <size_t _Ip, class _Hp>
-class __tuple_leaf<_Ip, _Hp, true> : private _Hp {
+class __tuple_leaf<_Ip, _Hp, true> : private __remove_cv_t<_Hp> {
 public:
   _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_leaf& operator=(const __tuple_leaf&) = delete;
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/assign_range.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/assign_range.pass.cpp
index e5d0454a844d5..a8cbb32c2655e 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/assign_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/assign_range.pass.cpp
@@ -12,6 +12,7 @@
 // template<container-compatible-range<bool> R>
 //   constexpr void assign_range(R&& rg); // C++23
 
+#include <sstream>
 #include <vector>
 
 #include "../insert_range_sequence_containers.h"
@@ -49,11 +50,29 @@ constexpr bool test() {
       v.assign_range(in);
       assert(std::ranges::equal(v, in));
     }
+
+    { // Ensure input-only sized ranges are accepted.
+      using input_iter = cpp20_input_iterator<const bool*>;
+      const bool in[]{true, true, false, true};
+      std::vector<bool> v;
+      v.assign_range(std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+      assert(std::ranges::equal(v, std::vector<bool>{true, true, false, true}));
+    }
   }
 
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 1 0 1"};
+  auto vals = std::views::istream<bool>(is);
+  std::vector<bool> v;
+  v.assign_range(std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{true, true, false}));
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
@@ -61,5 +80,9 @@ int main(int, char**) {
   // Note: `test_assign_range_exception_safety_throwing_copy` doesn't apply because copying booleans cannot throw.
   test_assign_range_exception_safety_throwing_allocator<std::vector, bool>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector.bool/construct_from_range.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/construct_from_range.pass.cpp
index 03f3100b92883..055f2378125d9 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/construct_from_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/construct_from_range.pass.cpp
@@ -8,6 +8,7 @@
 
 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
 
+#include <sstream>
 #include <vector>
 
 #include "../from_range_sequence_containers.h"
@@ -24,9 +25,25 @@ constexpr bool test() {
     });
   });
 
+  { // Ensure input-only sized ranges are accepted.
+    using input_iter = cpp20_input_iterator<const bool*>;
+    const bool in[]{true, true, false, true};
+    std::vector v(std::from_range, std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+    assert(std::ranges::equal(v, std::vector<bool>{true, true, false, true}));
+  }
+
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 1 0 1"};
+  auto vals = std::views::istream<bool>(is);
+  std::vector v(std::from_range, std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{true, true, false}));
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
@@ -36,5 +53,9 @@ int main(int, char**) {
   // Note: test_exception_safety_throwing_copy doesn't apply because copying a boolean cannot throw.
   test_exception_safety_throwing_allocator<std::vector, bool>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_range.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_range.pass.cpp
index 65d085fa1f083..d8f19d47e9942 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/insert_range.pass.cpp
@@ -12,6 +12,7 @@
 // template<container-compatible-range<bool> R>
 //   constexpr iterator insert_range(const_iterator position, R&& rg); // C++23
 
+#include <sstream>
 #include <vector>
 
 #include "../insert_range_sequence_containers.h"
@@ -56,11 +57,29 @@ constexpr bool test() {
       v.insert_range(v.end(), in);
       assert(std::ranges::equal(v, std::vector<bool>{0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}));
     }
+
+    { // Ensure input-only sized ranges are accepted.
+      using input_iter = cpp20_input_iterator<const bool*>;
+      const bool in[]{true, true, false, true};
+      std::vector<bool> v{true, false};
+      v.insert_range(v.begin(), std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+      assert(std::ranges::equal(v, std::vector<bool>{true, true, false, true, true, false}));
+    }
   }
 
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 1 0 1"};
+  auto vals = std::views::istream<bool>(is);
+  std::vector<bool> v;
+  v.insert_range(v.end(), std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{true, true, false}));
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
@@ -68,5 +87,9 @@ int main(int, char**) {
   // Note: `test_insert_range_exception_safety_throwing_copy` doesn't apply because copying booleans cannot throw.
   test_insert_range_exception_safety_throwing_allocator<std::vector, bool>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
index 3bc639d0479f9..30efe047054ab 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
@@ -11,7 +11,10 @@
 
 // void shrink_to_fit();
 
+// XFAIL: FROZEN-CXX03-HEADERS-FIXME
+
 #include <cassert>
+#include <climits>
 #include <vector>
 
 #include "increasing_allocator.h"
@@ -20,19 +23,56 @@
 
 TEST_CONSTEXPR_CXX20 bool tests() {
   {
-    std::vector<bool> v(100);
+    using C = std::vector<bool>;
+    C v(100);
     v.push_back(1);
+    C::size_type before_cap = v.capacity();
+    v.clear();
     v.shrink_to_fit();
-    assert(v.capacity() >= 101);
-    assert(v.size() >= 101);
+    assert(v.capacity() <= before_cap);
+    LIBCPP_ASSERT(v.capacity() == 0); // libc++ honors the shrink_to_fit request as a QOI matter
+    assert(v.size() == 0);
   }
-#if TEST_STD_VER >= 11
   {
-    std::vector<bool, min_allocator<bool>> v(100);
+    using C = std::vector<bool, min_allocator<bool> >;
+    C v(100);
     v.push_back(1);
+    C::size_type before_cap = v.capacity();
     v.shrink_to_fit();
     assert(v.capacity() >= 101);
-    assert(v.size() >= 101);
+    assert(v.capacity() <= before_cap);
+    assert(v.size() == 101);
+    v.erase(v.begin() + 1, v.end());
+    v.shrink_to_fit();
+    assert(v.capacity() <= before_cap);
+    LIBCPP_ASSERT(v.capacity() == C(1).capacity()); // libc++ honors the shrink_to_fit request as a QOI matter.
+    assert(v.size() == 1);
+  }
+
+#if defined(_LIBCPP_VERSION)
+  {
+    using C                = std::vector<bool>;
+    unsigned bits_per_word = static_cast<unsigned>(sizeof(C::__storage_type) * CHAR_BIT);
+    C v(bits_per_word);
+    v.push_back(1);
+    assert(v.capacity() == bits_per_word * 2);
+    assert(v.size() == bits_per_word + 1);
+    v.pop_back();
+    v.shrink_to_fit();
+    assert(v.capacity() == bits_per_word);
+    assert(v.size() == bits_per_word);
+  }
+  {
+    using C                = std::vector<bool>;
+    unsigned bits_per_word = static_cast<unsigned>(sizeof(C::__storage_type) * CHAR_BIT);
+    C v;
+    v.reserve(bits_per_word * 2);
+    v.push_back(1);
+    assert(v.capacity() == bits_per_word * 2);
+    assert(v.size() == 1);
+    v.shrink_to_fit();
+    assert(v.capacity() == bits_per_word);
+    assert(v.size() == 1);
   }
 #endif
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_from_range.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_from_range.pass.cpp
index 5fb2b46f7e942..501abf396391f 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_from_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_from_range.pass.cpp
@@ -11,6 +11,7 @@
 // template<container-compatible-range<T> R>
 //   vector(from_range_t, R&& rg, const Allocator& = Allocator()); // C++23
 
+#include <sstream>
 #include <vector>
 
 #include "../../from_range_sequence_containers.h"
@@ -26,9 +27,25 @@ constexpr bool test() {
   });
   test_sequence_container_move_only<std::vector>();
 
+  { // Ensure input-only sized ranges are accepted.
+    using input_iter = cpp20_input_iterator<const int*>;
+    const int in[]{1, 2, 3, 4};
+    std::vector v(std::from_range, std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+    assert(std::ranges::equal(v, std::vector<int>{1, 2, 3, 4}));
+  }
+
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 2 3 4"};
+  auto vals = std::views::istream<int>(is);
+  std::vector v(std::from_range, std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{1, 2, 3}));
+}
+#endif
+
 int main(int, char**) {
   static_assert(test_constraints<std::vector, int, double>());
   test();
@@ -38,5 +55,9 @@ int main(int, char**) {
   test_exception_safety_throwing_copy<std::vector>();
   test_exception_safety_throwing_allocator<std::vector, int>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/assign_range.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/assign_range.pass.cpp
index 8ab3dc10aed99..6326ffca96966 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/assign_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/assign_range.pass.cpp
@@ -12,6 +12,7 @@
 // template<container-compatible-range<T> R>
 //   constexpr void assign_range(R&& rg); // C++23
 
+#include <sstream>
 #include <vector>
 
 #include "../../insert_range_sequence_containers.h"
@@ -62,11 +63,29 @@ constexpr bool test() {
       v.assign_range(in);
       assert(std::ranges::equal(v, in));
     }
+
+    { // Ensure input-only sized ranges are accepted.
+      using input_iter = cpp20_input_iterator<const int*>;
+      const int in[]{1, 2, 3, 4};
+      std::vector<int> v;
+      v.assign_range(std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+      assert(std::ranges::equal(v, std::vector<int>{1, 2, 3, 4}));
+    }
   }
 
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 2 3 4"};
+  auto vals = std::views::istream<int>(is);
+  std::vector<int> v;
+  v.assign_range(std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{1, 2, 3}));
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
@@ -74,5 +93,9 @@ int main(int, char**) {
   test_assign_range_exception_safety_throwing_copy<std::vector>();
   test_assign_range_exception_safety_throwing_allocator<std::vector, int>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_range.pass.cpp
index 0e26cb1546277..25ffc33aad424 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_range.pass.cpp
@@ -12,6 +12,7 @@
 // template<container-compatible-range<T> R>
 //   constexpr iterator insert_range(const_iterator position, R&& rg); // C++23
 
+#include <sstream>
 #include <vector>
 
 #include "../../insert_range_sequence_containers.h"
@@ -53,11 +54,29 @@ constexpr bool test() {
       v.insert_range(v.end(), in);
       assert(std::ranges::equal(v, std::array{1, 2, 3, 4, 5, 6, 7, 8, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10}));
     }
+
+    { // Ensure input-only sized ranges are accepted.
+      using input_iter = cpp20_input_iterator<const int*>;
+      const int in[]{1, 2, 3, 4};
+      std::vector<int> v{-5, -6};
+      v.insert_range(v.begin(), std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+      assert(std::ranges::equal(v, std::vector<int>{1, 2, 3, 4, -5, -6}));
+    }
   }
 
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"1 2 3 4"};
+  auto vals = std::views::istream<int>(is);
+  std::vector<int> v;
+  v.insert_range(v.end(), std::views::counted(vals.begin(), 3));
+  assert(v == (std::vector{1, 2, 3}));
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
@@ -67,5 +86,9 @@ int main(int, char**) {
   test_insert_range_exception_safety_throwing_copy<std::vector>();
   test_insert_range_exception_safety_throwing_allocator<std::vector, int>();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/strings/basic.string/string.cons/from_range.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/from_range.pass.cpp
index 6a1b45b25ef03..c110a59d606ff 100644
--- a/libcxx/test/std/strings/basic.string/string.cons/from_range.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.cons/from_range.pass.cpp
@@ -12,6 +12,7 @@
 //   constexpr basic_string(from_range_t, R&& rg, const Allocator& a = Allocator());           // since C++23
 
 #include <algorithm>
+#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
@@ -82,6 +83,13 @@ constexpr void test_with_input(std::vector<char> input) {
     assert(std::ranges::equal(input, c));
     LIBCPP_ASSERT(is_string_asan_correct(c));
   }
+
+  { // Ensure input-only sized ranges are accepted.
+    using input_iter = cpp20_input_iterator<const char*>;
+    const char in[]{'q', 'w', 'e', 'r'};
+    std::string s(std::from_range, std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+    assert(s == "qwer");
+  }
 }
 
 void test_string_exception_safety_throwing_allocator() {
@@ -116,6 +124,15 @@ constexpr bool test_inputs() {
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"qwert"};
+  auto vals = std::views::istream<char>(is);
+  std::string s(std::from_range, std::views::counted(vals.begin(), 3));
+  assert(s == "qwe");
+}
+#endif
+
 int main(int, char**) {
   test_inputs();
   static_assert(test_inputs());
@@ -125,5 +142,9 @@ int main(int, char**) {
   // Note: `test_exception_safety_throwing_copy` doesn't apply because copying a `char` cannot throw.
   test_string_exception_safety_throwing_allocator();
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   return 0;
 }
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/insert_range.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/insert_range.pass.cpp
index 45d1f620e9054..691cd0e19fa3a 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/insert_range.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/insert_range.pass.cpp
@@ -11,6 +11,7 @@
 // template<container-compatible-range<charT> R>
 //   constexpr iterator insert_range(const_iterator p, R&& rg);                                // C++23
 
+#include <sstream>
 #include <string>
 
 #include "../../../../containers/sequences/insert_range_sequence_containers.h"
@@ -27,9 +28,27 @@ constexpr bool test_constexpr() {
         []([[maybe_unused]] auto&& c) { LIBCPP_ASSERT(c.__invariants()); });
   });
 
+  { // Ensure input-only sized ranges are accepted.
+    using input_iter = cpp20_input_iterator<const char*>;
+    const char in[]{'q', 'w', 'e', 'r'};
+    std::string s = "zxcv";
+    s.insert_range(s.begin(), std::views::counted(input_iter{std::ranges::begin(in)}, std::ranges::ssize(in)));
+    assert(s == "qwerzxcv");
+  }
+
   return true;
 }
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+void test_counted_istream_view() {
+  std::istringstream is{"qwert"};
+  auto vals     = std::views::istream<char>(is);
+  std::string s = "zxcv";
+  s.insert_range(s.begin(), std::views::counted(vals.begin(), 3));
+  assert(s == "qwezxcv");
+}
+#endif
+
 int main(int, char**) {
   static_assert(test_constraints_insert_range<std::basic_string, char, int>());
 
@@ -39,6 +58,10 @@ int main(int, char**) {
   });
   static_assert(test_constexpr());
 
+#ifndef TEST_HAS_NO_LOCALIZATION
+  test_counted_istream_view();
+#endif
+
   // Note: `test_insert_range_exception_safety_throwing_copy` doesn't apply because copying chars cannot throw.
   {
 #if !defined(TEST_HAS_NO_EXCEPTIONS)
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index ff3c89884c24d..11e7cf4346b23 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -1167,7 +1167,7 @@ uint32_t ImportThunkChunkARM64EC::extendRanges() {
 }
 
 uint64_t Arm64XRelocVal::get() const {
-  return (sym ? sym->getRVA() : 0) + value;
+  return (sym ? sym->getRVA() : 0) + (chunk ? chunk->getRVA() : 0) + value;
 }
 
 size_t Arm64XDynamicRelocEntry::getSize() const {
@@ -1230,6 +1230,17 @@ void DynamicRelocsChunk::finalize() {
   size = alignTo(size, sizeof(uint32_t));
 }
 
+// Set the reloc value. The reloc entry must be allocated beforehand.
+void DynamicRelocsChunk::set(uint32_t rva, Arm64XRelocVal value) {
+  auto entry =
+      llvm::find_if(arm64xRelocs, [rva](const Arm64XDynamicRelocEntry &e) {
+        return e.offset.get() == rva;
+      });
+  assert(entry != arm64xRelocs.end());
+  assert(!entry->value.get());
+  entry->value = value;
+}
+
 void DynamicRelocsChunk::writeTo(uint8_t *buf) const {
   auto table = reinterpret_cast<coff_dynamic_reloc_table *>(buf);
   table->Version = 1;
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 7ba58e336451f..d6216efdd90bd 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -840,10 +840,13 @@ class Arm64XRelocVal {
 public:
   Arm64XRelocVal(uint64_t value = 0) : value(value) {}
   Arm64XRelocVal(Defined *sym, int32_t offset = 0) : sym(sym), value(offset) {}
+  Arm64XRelocVal(Chunk *chunk, int32_t offset = 0)
+      : chunk(chunk), value(offset) {}
   uint64_t get() const;
 
 private:
   Defined *sym = nullptr;
+  Chunk *chunk = nullptr;
   uint64_t value;
 };
 
@@ -874,10 +877,12 @@ class DynamicRelocsChunk : public NonSectionChunk {
   void finalize();
 
   void add(llvm::COFF::Arm64XFixupType type, uint8_t size,
-           Arm64XRelocVal offset, Arm64XRelocVal value) {
+           Arm64XRelocVal offset, Arm64XRelocVal value = Arm64XRelocVal()) {
     arm64xRelocs.emplace_back(type, size, offset, value);
   }
 
+  void set(uint32_t rva, Arm64XRelocVal value);
+
 private:
   std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
   size_t size;
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index b08427f738bb4..cd280aa09964d 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -162,8 +162,6 @@ struct Configuration {
   bool dll = false;
   StringRef implib;
   bool noimplib = false;
-  std::vector<Export> exports;
-  bool hadExplicitExports;
   std::set<std::string> delayLoads;
   std::map<std::string, int> dllOrder;
   Symbol *delayLoadHelper = nullptr;
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 3e9f3f4718386..6a3f8eb21e847 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -639,22 +639,22 @@ class ExportDirectoryChunk : public NonSectionChunk {
 
 class AddressTableChunk : public NonSectionChunk {
 public:
-  explicit AddressTableChunk(COFFLinkerContext &ctx, size_t baseOrdinal,
+  explicit AddressTableChunk(SymbolTable &symtab, size_t baseOrdinal,
                              size_t maxOrdinal)
       : baseOrdinal(baseOrdinal), size((maxOrdinal - baseOrdinal) + 1),
-        ctx(ctx) {}
+        symtab(symtab) {}
   size_t getSize() const override { return size * 4; }
 
   void writeTo(uint8_t *buf) const override {
     memset(buf, 0, getSize());
 
-    for (const Export &e : ctx.config.exports) {
+    for (const Export &e : symtab.exports) {
       assert(e.ordinal >= baseOrdinal && "Export symbol has invalid ordinal");
       // Subtract the OrdinalBase to get the index.
       uint8_t *p = buf + (e.ordinal - baseOrdinal) * 4;
       uint32_t bit = 0;
       // Pointer to thumb code must have the LSB set, so adjust it.
-      if (ctx.config.machine == ARMNT && !e.data)
+      if (symtab.machine == ARMNT && !e.data)
         bit = 1;
       if (e.forwardChunk) {
         write32le(p, e.forwardChunk->getRVA() | bit);
@@ -669,7 +669,7 @@ class AddressTableChunk : public NonSectionChunk {
 private:
   size_t baseOrdinal;
   size_t size;
-  const COFFLinkerContext &ctx;
+  const SymbolTable &symtab;
 };
 
 class NamePointersChunk : public NonSectionChunk {
@@ -690,13 +690,13 @@ class NamePointersChunk : public NonSectionChunk {
 
 class ExportOrdinalChunk : public NonSectionChunk {
 public:
-  explicit ExportOrdinalChunk(const COFFLinkerContext &ctx, size_t baseOrdinal,
+  explicit ExportOrdinalChunk(const SymbolTable &symtab, size_t baseOrdinal,
                               size_t tableSize)
-      : baseOrdinal(baseOrdinal), size(tableSize), ctx(ctx) {}
+      : baseOrdinal(baseOrdinal), size(tableSize), symtab(symtab) {}
   size_t getSize() const override { return size * 2; }
 
   void writeTo(uint8_t *buf) const override {
-    for (const Export &e : ctx.config.exports) {
+    for (const Export &e : symtab.exports) {
       if (e.noname)
         continue;
       assert(e.ordinal >= baseOrdinal && "Export symbol has invalid ordinal");
@@ -709,7 +709,7 @@ class ExportOrdinalChunk : public NonSectionChunk {
 private:
   size_t baseOrdinal;
   size_t size;
-  const COFFLinkerContext &ctx;
+  const SymbolTable &symtab;
 };
 
 } // anonymous namespace
@@ -920,9 +920,9 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *s,
   }
 }
 
-void createEdataChunks(COFFLinkerContext &ctx, std::vector<Chunk *> &chunks) {
+void createEdataChunks(SymbolTable &symtab, std::vector<Chunk *> &chunks) {
   unsigned baseOrdinal = 1 << 16, maxOrdinal = 0;
-  for (Export &e : ctx.config.exports) {
+  for (Export &e : symtab.exports) {
     baseOrdinal = std::min(baseOrdinal, (unsigned)e.ordinal);
     maxOrdinal = std::max(maxOrdinal, (unsigned)e.ordinal);
   }
@@ -930,15 +930,16 @@ void createEdataChunks(COFFLinkerContext &ctx, std::vector<Chunk *> &chunks) {
   // https://learn.microsoft.com/en-us/cpp/build/reference/export-exports-a-function?view=msvc-170
   assert(baseOrdinal >= 1);
 
-  auto *dllName = make<StringChunk>(sys::path::filename(ctx.config.outputFile));
-  auto *addressTab = make<AddressTableChunk>(ctx, baseOrdinal, maxOrdinal);
+  auto *dllName =
+      make<StringChunk>(sys::path::filename(symtab.ctx.config.outputFile));
+  auto *addressTab = make<AddressTableChunk>(symtab, baseOrdinal, maxOrdinal);
   std::vector<Chunk *> names;
-  for (Export &e : ctx.config.exports)
+  for (Export &e : symtab.exports)
     if (!e.noname)
       names.push_back(make<StringChunk>(e.exportName));
 
   std::vector<Chunk *> forwards;
-  for (Export &e : ctx.config.exports) {
+  for (Export &e : symtab.exports) {
     if (e.forwardTo.empty())
       continue;
     e.forwardChunk = make<StringChunk>(e.forwardTo);
@@ -946,7 +947,8 @@ void createEdataChunks(COFFLinkerContext &ctx, std::vector<Chunk *> &chunks) {
   }
 
   auto *nameTab = make<NamePointersChunk>(names);
-  auto *ordinalTab = make<ExportOrdinalChunk>(ctx, baseOrdinal, names.size());
+  auto *ordinalTab =
+      make<ExportOrdinalChunk>(symtab, baseOrdinal, names.size());
   auto *dir =
       make<ExportDirectoryChunk>(baseOrdinal, maxOrdinal, names.size(), dllName,
                                  addressTab, nameTab, ordinalTab);
diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h
index 901c974069b47..724a323d62d20 100644
--- a/lld/COFF/DLL.h
+++ b/lld/COFF/DLL.h
@@ -78,7 +78,7 @@ class DelayLoadContents {
 };
 
 // Create all chunks for the DLL export table.
-void createEdataChunks(COFFLinkerContext &ctx, std::vector<Chunk *> &chunks);
+void createEdataChunks(SymbolTable &symtab, std::vector<Chunk *> &chunks);
 
 } // namespace lld::coff
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index b257071c97086..4e0678282eed0 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -458,7 +458,7 @@ void LinkerDriver::parseDirectives(InputFile *file) {
     // declarations, many object files may end up with having the
     // same /EXPORT options. In order to save cost of parsing them,
     // we dedup them first.
-    if (!directivesExports.insert(e).second)
+    if (!file->symtab.directivesExports.insert(e).second)
       continue;
 
     Export exp = parseExport(e);
@@ -469,7 +469,7 @@ void LinkerDriver::parseDirectives(InputFile *file) {
         exp.extName = saver().save("_" + exp.extName);
     }
     exp.source = ExportSource::Directives;
-    ctx.config.exports.push_back(exp);
+    file->symtab.exports.push_back(exp);
   }
 
   // Handle /include: in bulk.
@@ -956,7 +956,7 @@ std::string LinkerDriver::getImportName(bool asLib) {
 void LinkerDriver::createImportLibrary(bool asLib) {
   llvm::TimeTraceScope timeScope("Create import library");
   std::vector<COFFShortExport> exports;
-  for (Export &e1 : ctx.config.exports) {
+  for (Export &e1 : ctx.symtab.exports) {
     COFFShortExport e2;
     e2.Name = std::string(e1.name);
     e2.SymbolName = std::string(e1.symbolName);
@@ -1069,7 +1069,7 @@ void LinkerDriver::parseModuleDefs(StringRef path) {
     e2.isPrivate = e1.Private;
     e2.constant = e1.Constant;
     e2.source = ExportSource::ModuleDefinition;
-    ctx.config.exports.push_back(e2);
+    ctx.symtab.exports.push_back(e2);
   }
 }
 
@@ -1222,8 +1222,10 @@ static void findKeepUniqueSections(COFFLinkerContext &ctx) {
 
   // Exported symbols could be address-significant in other executables or DSOs,
   // so we conservatively mark them as address-significant.
-  for (Export &r : ctx.config.exports)
-    markAddrsig(r.sym);
+  ctx.forEachSymtab([](SymbolTable &symtab) {
+    for (Export &r : symtab.exports)
+      markAddrsig(r.sym);
+  });
 
   // Visit the address-significance table in each object file and mark each
   // referenced symbol as address-significant.
@@ -1376,13 +1378,13 @@ void LinkerDriver::maybeCreateECExportThunk(StringRef name, Symbol *&sym) {
 void LinkerDriver::createECExportThunks() {
   // Check if EXP+ symbols have corresponding $hp_target symbols and use them
   // to create export thunks when available.
-  for (Symbol *s : ctx.symtab.expSymbols) {
+  for (Symbol *s : ctx.symtabEC->expSymbols) {
     if (!s->isUsedInRegularObj)
       continue;
     assert(s->getName().starts_with("EXP+"));
     std::string targetName =
         (s->getName().substr(strlen("EXP+")) + "$hp_target").str();
-    Symbol *sym = ctx.symtab.find(targetName);
+    Symbol *sym = ctx.symtabEC->find(targetName);
     if (!sym)
       continue;
     Defined *targetSym;
@@ -1407,7 +1409,7 @@ void LinkerDriver::createECExportThunks() {
   if (ctx.symtabEC->entry)
     maybeCreateECExportThunk(ctx.symtabEC->entry->getName(),
                              ctx.symtabEC->entry);
-  for (Export &e : ctx.config.exports) {
+  for (Export &e : ctx.symtabEC->exports) {
     if (!e.data)
       maybeCreateECExportThunk(e.extName.empty() ? e.name : e.extName, e.sym);
   }
@@ -1430,7 +1432,7 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
     if (!ctx.config.dll)
       return;
 
-    if (!ctx.config.exports.empty())
+    if (!ctx.symtab.exports.empty())
       return;
     if (args.hasArg(OPT_exclude_all_symbols))
       return;
@@ -1466,7 +1468,7 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
       if (!(c->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE))
         e.data = true;
     s->isUsedInRegularObj = true;
-    ctx.config.exports.push_back(e);
+    ctx.symtab.exports.push_back(e);
   });
 }
 
@@ -2343,7 +2345,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
         if (!e.extName.empty() && !isDecorated(e.extName))
           e.extName = saver().save("_" + e.extName);
       }
-      config->exports.push_back(e);
+      mainSymtab.exports.push_back(e);
     }
   }
 
@@ -2355,7 +2357,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
 
   // Handle generation of import library from a def file.
   if (!args.hasArg(OPT_INPUT, OPT_wholearchive_file)) {
-    fixupExports();
+    ctx.forEachSymtab([](SymbolTable &symtab) { symtab.fixupExports(); });
     if (!config->noimplib)
       createImportLibrary(/*asLib=*/true);
     return;
@@ -2541,16 +2543,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
         // search for its mangled names.
         if (symtab.entry)
           symtab.mangleMaybe(symtab.entry);
-      });
 
-      // Windows specific -- Make sure we resolve all dllexported symbols.
-      for (Export &e : config->exports) {
-        if (!e.forwardTo.empty())
-          continue;
-        e.sym = ctx.symtab.addGCRoot(e.name, !e.data);
-        if (e.source != ExportSource::Directives)
-          e.symbolName = ctx.symtab.mangleMaybe(e.sym);
-      }
+        // Windows specific -- Make sure we resolve all dllexported symbols.
+        for (Export &e : symtab.exports) {
+          if (!e.forwardTo.empty())
+            continue;
+          e.sym = symtab.addGCRoot(e.name, !e.data);
+          if (e.source != ExportSource::Directives)
+            e.symbolName = symtab.mangleMaybe(e.sym);
+        }
+      });
 
       // Add weak aliases. Weak aliases is a mechanism to give remaining
       // undefined symbols final chance to be resolved successfully.
@@ -2651,7 +2653,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   if (errorCount())
     return;
 
-  config->hadExplicitExports = !config->exports.empty();
+  ctx.forEachSymtab([](SymbolTable &symtab) {
+    symtab.hadExplicitExports = !symtab.exports.empty();
+  });
   if (config->mingw) {
     // In MinGW, all symbols are automatically exported if no symbols
     // are chosen to be exported.
@@ -2716,17 +2720,18 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // Windows specific -- when we are creating a .dll file, we also
   // need to create a .lib file. In MinGW mode, we only do that when the
   // -implib option is given explicitly, for compatibility with GNU ld.
-  if (!config->exports.empty() || config->dll) {
+  if (!ctx.symtab.exports.empty() || config->dll) {
     llvm::TimeTraceScope timeScope("Create .lib exports");
-    fixupExports();
+    ctx.forEachSymtab([](SymbolTable &symtab) { symtab.fixupExports(); });
     if (!config->noimplib && (!config->mingw || !config->implib.empty()))
       createImportLibrary(/*asLib=*/false);
-    assignExportOrdinals();
+    ctx.forEachSymtab(
+        [](SymbolTable &symtab) { symtab.assignExportOrdinals(); });
   }
 
   // Handle /output-def (MinGW specific).
   if (auto *arg = args.getLastArg(OPT_output_def))
-    writeDefFile(ctx, arg->getValue(), config->exports);
+    writeDefFile(ctx, arg->getValue(), ctx.symtab.exports);
 
   // Set extra alignment for .comm symbols
   for (auto pair : config->alignComm) {
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index 5f65bd7f8d097..12724cbd1eef4 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -182,7 +182,6 @@ class LinkerDriver {
   std::list<std::function<void()>> taskQueue;
   std::vector<MemoryBufferRef> resources;
 
-  llvm::DenseSet<StringRef> directivesExports;
   llvm::DenseSet<StringRef> excludedSymbols;
 
   COFFLinkerContext &ctx;
@@ -249,8 +248,6 @@ class LinkerDriver {
 
   // Used for dllexported symbols.
   Export parseExport(StringRef arg);
-  void fixupExports();
-  void assignExportOrdinals();
 
   // Parses a string in the form of "key=value" and check
   // if value matches previous values for the key.
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 19abd4806d53f..4a70c826691d1 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -656,142 +656,6 @@ Export LinkerDriver::parseExport(StringRef arg) {
   llvm_unreachable("");
 }
 
-// Convert stdcall/fastcall style symbols into unsuffixed symbols,
-// with or without a leading underscore. (MinGW specific.)
-static StringRef killAt(StringRef sym, bool prefix) {
-  if (sym.empty())
-    return sym;
-  // Strip any trailing stdcall suffix
-  sym = sym.substr(0, sym.find('@', 1));
-  if (!sym.starts_with("@")) {
-    if (prefix && !sym.starts_with("_"))
-      return saver().save("_" + sym);
-    return sym;
-  }
-  // For fastcall, remove the leading @ and replace it with an
-  // underscore, if prefixes are used.
-  sym = sym.substr(1);
-  if (prefix)
-    sym = saver().save("_" + sym);
-  return sym;
-}
-
-static StringRef exportSourceName(ExportSource s) {
-  switch (s) {
-  case ExportSource::Directives:
-    return "source file (directives)";
-  case ExportSource::Export:
-    return "/export";
-  case ExportSource::ModuleDefinition:
-    return "/def";
-  default:
-    llvm_unreachable("unknown ExportSource");
-  }
-}
-
-// Performs error checking on all /export arguments.
-// It also sets ordinals.
-void LinkerDriver::fixupExports() {
-  llvm::TimeTraceScope timeScope("Fixup exports");
-  // Symbol ordinals must be unique.
-  std::set<uint16_t> ords;
-  for (Export &e : ctx.config.exports) {
-    if (e.ordinal == 0)
-      continue;
-    if (!ords.insert(e.ordinal).second)
-      Fatal(ctx) << "duplicate export ordinal: " << e.name;
-  }
-
-  for (Export &e : ctx.config.exports) {
-    if (!e.exportAs.empty()) {
-      e.exportName = e.exportAs;
-      continue;
-    }
-
-    StringRef sym =
-        !e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName;
-    if (ctx.config.machine == I386 && sym.starts_with("_")) {
-      // In MSVC mode, a fully decorated stdcall function is exported
-      // as-is with the leading underscore (with type IMPORT_NAME).
-      // In MinGW mode, a decorated stdcall function gets the underscore
-      // removed, just like normal cdecl functions.
-      if (ctx.config.mingw || !sym.contains('@')) {
-        e.exportName = sym.substr(1);
-        continue;
-      }
-    }
-    if (isArm64EC(ctx.config.machine) && !e.data && !e.constant) {
-      if (std::optional<std::string> demangledName =
-              getArm64ECDemangledFunctionName(sym)) {
-        e.exportName = saver().save(*demangledName);
-        continue;
-      }
-    }
-    e.exportName = sym;
-  }
-
-  if (ctx.config.killAt && ctx.config.machine == I386) {
-    for (Export &e : ctx.config.exports) {
-      e.name = killAt(e.name, true);
-      e.exportName = killAt(e.exportName, false);
-      e.extName = killAt(e.extName, true);
-      e.symbolName = killAt(e.symbolName, true);
-    }
-  }
-
-  // Uniquefy by name.
-  DenseMap<StringRef, std::pair<Export *, unsigned>> map(
-      ctx.config.exports.size());
-  std::vector<Export> v;
-  for (Export &e : ctx.config.exports) {
-    auto pair = map.insert(std::make_pair(e.exportName, std::make_pair(&e, 0)));
-    bool inserted = pair.second;
-    if (inserted) {
-      pair.first->second.second = v.size();
-      v.push_back(e);
-      continue;
-    }
-    Export *existing = pair.first->second.first;
-    if (e == *existing || e.name != existing->name)
-      continue;
-    // If the existing export comes from .OBJ directives, we are allowed to
-    // overwrite it with /DEF: or /EXPORT without any warning, as MSVC link.exe
-    // does.
-    if (existing->source == ExportSource::Directives) {
-      *existing = e;
-      v[pair.first->second.second] = e;
-      continue;
-    }
-    if (existing->source == e.source) {
-      Warn(ctx) << "duplicate " << exportSourceName(existing->source)
-                << " option: " << e.name;
-    } else {
-      Warn(ctx) << "duplicate export: " << e.name << " first seen in "
-                << exportSourceName(existing->source) << ", now in "
-                << exportSourceName(e.source);
-    }
-  }
-  ctx.config.exports = std::move(v);
-
-  // Sort by name.
-  llvm::sort(ctx.config.exports, [](const Export &a, const Export &b) {
-    return a.exportName < b.exportName;
-  });
-}
-
-void LinkerDriver::assignExportOrdinals() {
-  // Assign unique ordinals if default (= 0).
-  uint32_t max = 0;
-  for (Export &e : ctx.config.exports)
-    max = std::max(max, (uint32_t)e.ordinal);
-  for (Export &e : ctx.config.exports)
-    if (e.ordinal == 0)
-      e.ordinal = ++max;
-  if (max > std::numeric_limits<uint16_t>::max())
-    Fatal(ctx) << "too many exported symbols (got " << max << ", max "
-               << Twine(std::numeric_limits<uint16_t>::max()) << ")";
-}
-
 // Parses a string in the form of "key=value" and check
 // if value matches previous values for the same key.
 void LinkerDriver::checkFailIfMismatch(StringRef arg, InputFile *source) {
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index af87587d143d5..eb98bb484f9f4 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -326,7 +326,7 @@ void lld::coff::writeMapFile(COFFLinkerContext &ctx) {
     os << " Exports\n";
     os << "\n";
     os << "  ordinal    name\n\n";
-    for (Export &e : ctx.config.exports) {
+    for (Export &e : ctx.symtab.exports) {
       os << format("  %7d", e.ordinal) << "    " << e.name << "\n";
       if (!e.extName.empty() && e.extName != e.name)
         os << "               exported name: " << e.extName << "\n";
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index bf965e8a2332d..ecccc7d6ed70c 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -1118,6 +1118,141 @@ void SymbolTable::addUndefinedGlob(StringRef arg) {
     addGCRoot(sym->getName());
 }
 
+// Convert stdcall/fastcall style symbols into unsuffixed symbols,
+// with or without a leading underscore. (MinGW specific.)
+static StringRef killAt(StringRef sym, bool prefix) {
+  if (sym.empty())
+    return sym;
+  // Strip any trailing stdcall suffix
+  sym = sym.substr(0, sym.find('@', 1));
+  if (!sym.starts_with("@")) {
+    if (prefix && !sym.starts_with("_"))
+      return saver().save("_" + sym);
+    return sym;
+  }
+  // For fastcall, remove the leading @ and replace it with an
+  // underscore, if prefixes are used.
+  sym = sym.substr(1);
+  if (prefix)
+    sym = saver().save("_" + sym);
+  return sym;
+}
+
+static StringRef exportSourceName(ExportSource s) {
+  switch (s) {
+  case ExportSource::Directives:
+    return "source file (directives)";
+  case ExportSource::Export:
+    return "/export";
+  case ExportSource::ModuleDefinition:
+    return "/def";
+  default:
+    llvm_unreachable("unknown ExportSource");
+  }
+}
+
+// Performs error checking on all /export arguments.
+// It also sets ordinals.
+void SymbolTable::fixupExports() {
+  llvm::TimeTraceScope timeScope("Fixup exports");
+  // Symbol ordinals must be unique.
+  std::set<uint16_t> ords;
+  for (Export &e : exports) {
+    if (e.ordinal == 0)
+      continue;
+    if (!ords.insert(e.ordinal).second)
+      Fatal(ctx) << "duplicate export ordinal: " << e.name;
+  }
+
+  for (Export &e : exports) {
+    if (!e.exportAs.empty()) {
+      e.exportName = e.exportAs;
+      continue;
+    }
+
+    StringRef sym =
+        !e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName;
+    if (machine == I386 && sym.starts_with("_")) {
+      // In MSVC mode, a fully decorated stdcall function is exported
+      // as-is with the leading underscore (with type IMPORT_NAME).
+      // In MinGW mode, a decorated stdcall function gets the underscore
+      // removed, just like normal cdecl functions.
+      if (ctx.config.mingw || !sym.contains('@')) {
+        e.exportName = sym.substr(1);
+        continue;
+      }
+    }
+    if (isEC() && !e.data && !e.constant) {
+      if (std::optional<std::string> demangledName =
+              getArm64ECDemangledFunctionName(sym)) {
+        e.exportName = saver().save(*demangledName);
+        continue;
+      }
+    }
+    e.exportName = sym;
+  }
+
+  if (ctx.config.killAt && machine == I386) {
+    for (Export &e : exports) {
+      e.name = killAt(e.name, true);
+      e.exportName = killAt(e.exportName, false);
+      e.extName = killAt(e.extName, true);
+      e.symbolName = killAt(e.symbolName, true);
+    }
+  }
+
+  // Uniquefy by name.
+  DenseMap<StringRef, std::pair<Export *, unsigned>> map(exports.size());
+  std::vector<Export> v;
+  for (Export &e : exports) {
+    auto pair = map.insert(std::make_pair(e.exportName, std::make_pair(&e, 0)));
+    bool inserted = pair.second;
+    if (inserted) {
+      pair.first->second.second = v.size();
+      v.push_back(e);
+      continue;
+    }
+    Export *existing = pair.first->second.first;
+    if (e == *existing || e.name != existing->name)
+      continue;
+    // If the existing export comes from .OBJ directives, we are allowed to
+    // overwrite it with /DEF: or /EXPORT without any warning, as MSVC link.exe
+    // does.
+    if (existing->source == ExportSource::Directives) {
+      *existing = e;
+      v[pair.first->second.second] = e;
+      continue;
+    }
+    if (existing->source == e.source) {
+      Warn(ctx) << "duplicate " << exportSourceName(existing->source)
+                << " option: " << e.name;
+    } else {
+      Warn(ctx) << "duplicate export: " << e.name << " first seen in "
+                << exportSourceName(existing->source) << ", now in "
+                << exportSourceName(e.source);
+    }
+  }
+  exports = std::move(v);
+
+  // Sort by name.
+  llvm::sort(exports, [](const Export &a, const Export &b) {
+    return a.exportName < b.exportName;
+  });
+}
+
+void SymbolTable::assignExportOrdinals() {
+  // Assign unique ordinals if default (= 0).
+  uint32_t max = 0;
+  for (Export &e : exports)
+    max = std::max(max, (uint32_t)e.ordinal);
+  for (Export &e : exports)
+    if (e.ordinal == 0)
+      e.ordinal = ++max;
+  if (max > std::numeric_limits<uint16_t>::max())
+    Fatal(ctx) << "too many exported symbols (got " << max << ", max "
+               << Twine(std::numeric_limits<uint16_t>::max()) << ")";
+}
+
 Symbol *SymbolTable::addUndefined(StringRef name) {
   return addUndefined(name, nullptr, false);
 }
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 66bca0d63e5ff..e5b02ce5904c4 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -150,6 +150,17 @@ class SymbolTable {
   // A list of EC EXP+ symbols.
   std::vector<Symbol *> expSymbols;
 
+  // A list of DLL exports.
+  std::vector<Export> exports;
+  llvm::DenseSet<StringRef> directivesExports;
+  bool hadExplicitExports;
+
+  Chunk *edataStart = nullptr;
+  Chunk *edataEnd = nullptr;
+
+  void fixupExports();
+  void assignExportOrdinals();
+
   // Iterates symbols in non-determinstic hash table order.
   template <typename T> void forEachSymbol(T callback) {
     for (auto &pair : symMap)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 90b2c083cbfb9..bef2ced9f2957 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -288,8 +288,6 @@ class Writer {
   IdataContents idata;
   Chunk *importTableStart = nullptr;
   uint64_t importTableSize = 0;
-  Chunk *edataStart = nullptr;
-  Chunk *edataEnd = nullptr;
   Chunk *iatStart = nullptr;
   uint64_t iatSize = 0;
   DelayLoadContents delayIdata;
@@ -1331,22 +1329,46 @@ void Writer::createExportTable() {
   if (!edataSec->chunks.empty()) {
     // Allow using a custom built export table from input object files, instead
     // of having the linker synthesize the tables.
-    if (ctx.config.hadExplicitExports)
-      Warn(ctx) << "literal .edata sections override exports";
-  } else if (!ctx.config.exports.empty()) {
-    std::vector<Chunk *> edataChunks;
-    createEdataChunks(ctx, edataChunks);
-    for (Chunk *c : edataChunks)
-      edataSec->addChunk(c);
-  }
-  if (!edataSec->chunks.empty()) {
-    edataStart = edataSec->chunks.front();
-    edataEnd = edataSec->chunks.back();
+    if (!ctx.hybridSymtab) {
+      ctx.symtab.edataStart = edataSec->chunks.front();
+      ctx.symtab.edataEnd = edataSec->chunks.back();
+    } else {
+      // On hybrid target, split EC and native chunks.
+      llvm::stable_sort(edataSec->chunks, [=](const Chunk *a, const Chunk *b) {
+        return (a->getMachine() != ARM64) < (b->getMachine() != ARM64);
+      });
+
+      for (auto chunk : edataSec->chunks) {
+        if (chunk->getMachine() != ARM64) {
+          ctx.hybridSymtab->edataStart = chunk;
+          ctx.hybridSymtab->edataEnd = edataSec->chunks.back();
+          break;
+        }
+
+        if (!ctx.symtab.edataStart)
+          ctx.symtab.edataStart = chunk;
+        ctx.symtab.edataEnd = chunk;
+      }
+    }
   }
-  // Warn on exported deleting destructor.
-  for (auto e : ctx.config.exports)
-    if (e.sym && e.sym->getName().starts_with("??_G"))
-      Warn(ctx) << "export of deleting dtor: " << e.sym;
+  ctx.forEachSymtab([&](SymbolTable &symtab) {
+    if (symtab.edataStart) {
+      if (symtab.hadExplicitExports)
+        Warn(ctx) << "literal .edata sections override exports";
+    } else if (!symtab.exports.empty()) {
+      std::vector<Chunk *> edataChunks;
+      createEdataChunks(symtab, edataChunks);
+      for (Chunk *c : edataChunks)
+        edataSec->addChunk(c);
+      symtab.edataStart = edataChunks.front();
+      symtab.edataEnd = edataChunks.back();
+    }
+
+    // Warn on exported deleting destructor.
+    for (auto e : symtab.exports)
+      if (e.sym && e.sym->getName().starts_with("??_G"))
+        Warn(ctx) << "export of deleting dtor: " << toString(ctx, *e.sym);
+  });
 }
 
 void Writer::removeUnusedSections() {
@@ -1819,10 +1841,11 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
          dataDirOffset64 == buf - buffer->getBufferStart());
   auto *dir = reinterpret_cast<data_directory *>(buf);
   buf += sizeof(*dir) * numberOfDataDirectory;
-  if (edataStart) {
-    dir[EXPORT_TABLE].RelativeVirtualAddress = edataStart->getRVA();
-    dir[EXPORT_TABLE].Size =
-        edataEnd->getRVA() + edataEnd->getSize() - edataStart->getRVA();
+  if (ctx.symtab.edataStart) {
+    dir[EXPORT_TABLE].RelativeVirtualAddress = ctx.symtab.edataStart->getRVA();
+    dir[EXPORT_TABLE].Size = ctx.symtab.edataEnd->getRVA() +
+                             ctx.symtab.edataEnd->getSize() -
+                             ctx.symtab.edataStart->getRVA();
   }
   if (importTableStart) {
     dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA();
@@ -2061,11 +2084,11 @@ void Writer::createGuardCFTables() {
   ctx.forEachSymtab([&](SymbolTable &symtab) {
     if (symtab.entry)
       maybeAddAddressTakenFunction(addressTakenSyms, symtab.entry);
-  });
 
-  // Mark exported symbols in executable sections as address-taken.
-  for (Export &e : config->exports)
-    maybeAddAddressTakenFunction(addressTakenSyms, e.sym);
+    // Mark exported symbols in executable sections as address-taken.
+    for (Export &e : symtab.exports)
+      maybeAddAddressTakenFunction(addressTakenSyms, e.sym);
+  });
 
   // For each entry in the .giats table, check if it has a corresponding load
   // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if
@@ -2392,6 +2415,19 @@ void Writer::setECSymbols() {
       symtab->findUnderscore("__arm64x_native_entrypoint")
           ->replaceKeepingName(altEntrySym, sizeof(SymbolUnion));
     }
+
+    if (symtab->edataStart)
+      ctx.dynamicRelocs->set(
+          dataDirOffset64 + EXPORT_TABLE * sizeof(data_directory) +
+              offsetof(data_directory, Size),
+          symtab->edataEnd->getRVA() - symtab->edataStart->getRVA() +
+              symtab->edataEnd->getSize());
+    if (hybridPdata.first)
+      ctx.dynamicRelocs->set(
+          dataDirOffset64 + EXCEPTION_TABLE * sizeof(data_directory) +
+              offsetof(data_directory, Size),
+          hybridPdata.last->getRVA() - hybridPdata.first->getRVA() +
+              hybridPdata.last->getSize());
   }
 }
 
@@ -2644,6 +2680,32 @@ void Writer::createDynamicRelocs() {
       Warn(ctx) << "'__chpe_metadata' is missing for ARM64X target";
   }
 
+  if (ctx.symtab.edataStart != ctx.hybridSymtab->edataStart) {
+    ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+                           dataDirOffset64 +
+                               EXPORT_TABLE * sizeof(data_directory) +
+                               offsetof(data_directory, RelativeVirtualAddress),
+                           ctx.hybridSymtab->edataStart);
+    // The Size value is assigned after addresses are finalized.
+    ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+                           dataDirOffset64 +
+                               EXPORT_TABLE * sizeof(data_directory) +
+                               offsetof(data_directory, Size));
+  }
+
+  if (pdata.first != hybridPdata.first) {
+    ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+                           dataDirOffset64 +
+                               EXCEPTION_TABLE * sizeof(data_directory) +
+                               offsetof(data_directory, RelativeVirtualAddress),
+                           hybridPdata.first);
+    // The Size value is assigned after addresses are finalized.
+    ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+                           dataDirOffset64 +
+                               EXCEPTION_TABLE * sizeof(data_directory) +
+                               offsetof(data_directory, Size));
+  }
+
   // Set the hybrid load config to the EC load config.
   ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
                          dataDirOffset64 +
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index b63551d0f682e..9538dd4a70bae 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
     return RE_AARCH64_AUTH;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     return RE_AARCH64_TLSDESC_PAGE;
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
+    return RE_AARCH64_AUTH_TLSDESC_PAGE;
   case R_AARCH64_TLSDESC_LD64_LO12:
   case R_AARCH64_TLSDESC_ADD_LO12:
     return R_TLSDESC;
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
+    return RE_AARCH64_AUTH_TLSDESC;
   case R_AARCH64_TLSDESC_CALL:
     return R_TLSDESC_CALL;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
@@ -545,6 +550,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
   case R_AARCH64_TLSDESC_ADR_PAGE21:
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
     checkInt(ctx, loc, val, 33, rel);
     [[fallthrough]];
   case R_AARCH64_ADR_PREL_PG_HI21_NC:
@@ -597,6 +603,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
     checkAlignment(ctx, loc, val, 8, rel);
     write32Imm12(loc, getBits(val, 3, 11));
     break;
@@ -671,6 +678,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
     break;
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_ADD_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
     write32Imm12(loc, val);
     break;
   case R_AARCH64_TLSDESC:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index efa7ba3e7cb06..42ef530b79d89 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -966,12 +966,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
   case R_SIZE:
     return r.sym->getSize() + a;
   case R_TLSDESC:
+  case RE_AARCH64_AUTH_TLSDESC:
     return ctx.in.got->getTlsDescAddr(*r.sym) + a;
   case R_TLSDESC_PC:
     return ctx.in.got->getTlsDescAddr(*r.sym) + a - p;
   case R_TLSDESC_GOTPLT:
     return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA();
   case RE_AARCH64_TLSDESC_PAGE:
+  case RE_AARCH64_AUTH_TLSDESC_PAGE:
     return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) -
            getAArch64Page(p);
   case RE_LOONGARCH_TLSDESC_PAGE_PC:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 77b90172bc1c2..76b151b93d517 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1292,6 +1292,27 @@ static unsigned handleMipsTlsRelocation(Ctx &ctx, RelType type, Symbol &sym,
   return 0;
 }
 
+static unsigned handleAArch64PAuthTlsRelocation(InputSectionBase *sec,
+                                                RelExpr expr, RelType type,
+                                                uint64_t offset, Symbol &sym,
+                                                int64_t addend) {
+  // Do not optimize signed TLSDESC to LE/IE (as described in pauthabielf64).
+  // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
+  // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC).
+  if (oneof<RE_AARCH64_AUTH_TLSDESC_PAGE, RE_AARCH64_AUTH_TLSDESC>(expr)) {
+    sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
+    sec->addReloc({expr, type, offset, addend, &sym});
+    return 1;
+  }
+
+  // TLSDESC_CALL hint relocation should not be emitted by compiler with signed
+  // TLSDESC enabled.
+  if (expr == R_TLSDESC_CALL)
+    sym.setFlags(NEEDS_TLSDESC_NONAUTH);
+
+  return 0;
+}
+
 // Notes about General Dynamic and Local Dynamic TLS models below. They may
 // require the generation of a pair of GOT entries that have associated dynamic
 // relocations. The pair of GOT entries created are of the form GOT[e0] Module
@@ -1302,6 +1323,13 @@ static unsigned handleMipsTlsRelocation(Ctx &ctx, RelType type, Symbol &sym,
 unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
                                                 uint64_t offset, Symbol &sym,
                                                 int64_t addend) {
+  bool isAArch64 = ctx.arg.emachine == EM_AARCH64;
+
+  if (isAArch64)
+    if (unsigned processed = handleAArch64PAuthTlsRelocation(
+            sec, expr, type, offset, sym, addend))
+      return processed;
+
   if (expr == R_TPREL || expr == R_TPREL_NEG) {
     if (ctx.arg.shared) {
       auto diag = Err(ctx);
@@ -1336,7 +1364,9 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
     // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
     // set NEEDS_TLSDESC on the label.
     if (expr != R_TLSDESC_CALL) {
-      if (!isRISCV || type == R_RISCV_TLSDESC_HI20)
+      if (isAArch64)
+        sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_NONAUTH);
+      else if (!isRISCV || type == R_RISCV_TLSDESC_HI20)
         sym.setFlags(NEEDS_TLSDESC);
       sec->addReloc({expr, type, offset, addend, &sym});
     }
@@ -1847,10 +1877,21 @@ void elf::postScanRelocations(Ctx &ctx) {
     GotSection *got = ctx.in.got.get();
 
     if (flags & NEEDS_TLSDESC) {
+      if ((flags & NEEDS_TLSDESC_AUTH) && (flags & NEEDS_TLSDESC_NONAUTH)) {
+        Err(ctx)
+            << "both AUTH and non-AUTH TLSDESC entries for '" << sym.getName()
+            << "' requested, but only one type of TLSDESC entry per symbol is "
+               "supported";
+        return;
+      }
       got->addTlsDescEntry(sym);
+      RelType tlsDescRel = ctx.target->tlsDescRel;
+      if (flags & NEEDS_TLSDESC_AUTH) {
+        got->addTlsDescAuthEntry();
+        tlsDescRel = ELF::R_AARCH64_AUTH_TLSDESC;
+      }
       ctx.mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
-          ctx.target->tlsDescRel, *got, got->getTlsDescOffset(sym), sym,
-          ctx.target->tlsDescRel);
+          tlsDescRel, *got, got->getTlsDescOffset(sym), sym, tlsDescRel);
     }
     if (flags & NEEDS_TLSGD) {
       got->addDynTlsEntry(sym);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index fde25a230b72e..d2a77bc953109 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -99,6 +99,8 @@ enum RelExpr {
   RE_AARCH64_PAGE_PC,
   RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
   RE_AARCH64_TLSDESC_PAGE,
+  RE_AARCH64_AUTH_TLSDESC_PAGE,
+  RE_AARCH64_AUTH_TLSDESC,
   RE_AARCH64_AUTH,
   RE_ARM_PCA,
   RE_ARM_SBREL,
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index f53515cc3f3c0..06a22613ee93a 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -1489,6 +1489,8 @@ Expr ScriptParser::readPrimary() {
     Expr e = readPrimary();
     return [=] { return -e().getValue(); };
   }
+  if (consume("+"))
+    return readPrimary();
 
   StringRef tok = next();
   std::string location = getCurrentLocation();
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index a59faf1037cb2..48df6f60db864 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -53,6 +53,8 @@ enum {
   NEEDS_TLSIE = 1 << 8,
   NEEDS_GOT_AUTH = 1 << 9,
   NEEDS_GOT_NONAUTH = 1 << 10,
+  NEEDS_TLSDESC_AUTH = 1 << 11,
+  NEEDS_TLSDESC_NONAUTH = 1 << 12,
 };
 
 // The base class for real symbol classes.
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 10cbfe19b3b0a..eb07d82fc9601 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -681,6 +681,11 @@ bool GotSection::addTlsDescEntry(const Symbol &sym) {
   return true;
 }
 
+void GotSection::addTlsDescAuthEntry() {
+  authEntries.push_back({(numEntries - 2) * ctx.arg.wordsize, true});
+  authEntries.push_back({(numEntries - 1) * ctx.arg.wordsize, false});
+}
+
 bool GotSection::addDynTlsEntry(const Symbol &sym) {
   assert(sym.auxIdx == ctx.symAux.size() - 1);
   ctx.symAux.back().tlsGdIdx = numEntries;
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 9fcee3b481af0..c977562f0b174 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -114,6 +114,7 @@ class GotSection final : public SyntheticSection {
   void addEntry(const Symbol &sym);
   void addAuthEntry(const Symbol &sym);
   bool addTlsDescEntry(const Symbol &sym);
+  void addTlsDescAuthEntry();
   bool addDynTlsEntry(const Symbol &sym);
   bool addTlsIndex();
   uint32_t getTlsDescOffset(const Symbol &sym) const;
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index bf458e392be8f..2f3ca13b832a1 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -205,7 +205,7 @@ InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const {
   assert(isa<InputSection *>(reloc.referent) &&
          "ARM64 thunk reloc is expected to point to an InputSection");
 
-  return reloc.referent.dyn_cast<InputSection *>();
+  return cast<InputSection *>(reloc.referent);
 }
 
 uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 417b7cf93efa7..97164e5992b8c 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1958,7 +1958,7 @@ void InitOffsetsSection::writeTo(uint8_t *buf) const {
   // FIXME: Add function specified by -init when that argument is implemented.
   for (ConcatInputSection *isec : sections) {
     for (const Reloc &rel : isec->relocs) {
-      const Symbol *referent = rel.referent.dyn_cast<Symbol *>();
+      const Symbol *referent = cast<Symbol *>(rel.referent);
       assert(referent && "section relocation should have been rejected");
       uint64_t offset = referent->getVA() - in.header->addr;
       // FIXME: Can we handle this gracefully?
diff --git a/lld/test/COFF/arm64x-export.test b/lld/test/COFF/arm64x-export.test
new file mode 100644
index 0000000000000..526be63397358
--- /dev/null
+++ b/lld/test/COFF/arm64x-export.test
@@ -0,0 +1,229 @@
+REQUIRES: aarch64, x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-func.s -o arm64ec-func.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows arm64-func.s -o arm64-func.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-drectve.s -o arm64ec-drectve.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-drectve.s -o arm64-drectve.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows edata.s -o arm64-edata.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows edata.s -o arm64ec-edata.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s -o loadconfig-arm64.obj
+
+
+# A command-line export applies only to EC exports.
+
+RUN: lld-link -machine:arm64x -dll -out:out-cmd.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj -noentry -export:func
+
+RUN: llvm-objdump -d out-cmd.dll | FileCheck --check-prefix=DISASM-EC %s
+DISASM-EC:      Disassembly of section .text:
+DISASM-EC-EMPTY:
+DISASM-EC-NEXT: 0000000180001000 <.text>:
+DISASM-EC-NEXT: 180001000: 52800040     mov     w0, #0x2                // =2
+DISASM-EC-NEXT: 180001004: d65f03c0     ret
+DISASM-EC-EMPTY:
+DISASM-EC-NEXT: Disassembly of section .hexpthk:
+DISASM-EC-EMPTY:
+DISASM-EC-NEXT: 0000000180002000 <.hexpthk>:
+DISASM-EC-NEXT: 180002000: 48 8b c4                     movq    %rsp, %rax
+DISASM-EC-NEXT: 180002003: 48 89 58 20                  movq    %rbx, 0x20(%rax)
+DISASM-EC-NEXT: 180002007: 55                           pushq   %rbp
+DISASM-EC-NEXT: 180002008: 5d                           popq    %rbp
+DISASM-EC-NEXT: 180002009: e9 f2 ef ff ff               jmp     0x180001000 <.text>
+DISASM-EC-NEXT: 18000200e: cc                           int3
+DISASM-EC-NEXT: 18000200f: cc                           int3
+
+RUN: llvm-readobj --headers --coff-exports out-cmd.dll | FileCheck --check-prefix=EXPORTS-EC %s
+EXPORTS-EC:      ExportTableRVA: 0x0
+EXPORTS-EC-NEXT: ExportTableSize: 0x0
+EXPORTS-EC-NOT:  Name: func
+EXPORTS-EC:      HybridObject {
+EXPORTS-EC:        ExportTableRVA: 0x3{{.*}}
+EXPORTS-EC-NEXT:   ExportTableSize: 0x4{{.*}}
+EXPORTS-EC:        Export {
+EXPORTS-EC-NEXT:     Ordinal: 1
+EXPORTS-EC-NEXT:     Name: func
+EXPORTS-EC-NEXT:     RVA: 0x2000
+EXPORTS-EC-NEXT:   }
+EXPORTS-EC-NEXT: }
+
+# Export using the EC .drectve section.
+
+RUN: lld-link -machine:arm64x -dll -out:out-drectve-ec.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64ec-drectve.obj -noentry
+RUN: llvm-objdump -d out-drectve-ec.dll | FileCheck --check-prefix=DISASM-EC %s
+RUN: llvm-readobj --headers --coff-exports out-drectve-ec.dll | FileCheck --check-prefix=EXPORTS-EC %s
+
+# Export using the EC .edata section.
+
+RUN: lld-link -machine:arm64x -dll -out:out-edata-ec.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64ec-edata.obj -noentry
+
+RUN: llvm-objdump -d out-edata-ec.dll | FileCheck --check-prefix=DISASM-EDATA-EC %s
+DISASM-EDATA-EC:      0000000180001000 <.text>:
+DISASM-EDATA-EC-NEXT: 180001000: 52800040     mov     w0, #0x2                // =2
+DISASM-EDATA-EC-NEXT: 180001004: d65f03c0     ret
+
+RUN: llvm-readobj --headers --coff-exports out-edata-ec.dll | FileCheck --check-prefix=EXPORTS-EDATA-EC %s
+EXPORTS-EDATA-EC:      ExportTableRVA: 0x0
+EXPORTS-EDATA-EC-NEXT: ExportTableSize: 0x0
+EXPORTS-EDATA-EC-NOT:  Name: func
+EXPORTS-EDATA-EC:      HybridObject {
+EXPORTS-EDATA-EC:        ExportTableRVA: 0x2{{.*}}
+EXPORTS-EDATA-EC-NEXT:   ExportTableSize: 0x4{{.*}}
+EXPORTS-EDATA-EC:        Export {
+EXPORTS-EDATA-EC-NEXT:     Ordinal: 1
+EXPORTS-EDATA-EC-NEXT:     Name: func
+EXPORTS-EDATA-EC-NEXT:     RVA: 0x1000
+EXPORTS-EDATA-EC-NEXT:   }
+EXPORTS-EDATA-EC-NEXT: }
+
+# Export using the native .drectve section.
+
+RUN: lld-link -machine:arm64x -dll -out:out-drectve-native.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64-drectve.obj -noentry
+
+RUN: llvm-objdump -d out-drectve-native.dll | FileCheck --check-prefix=DISASM-NATIVE %s
+DISASM-NATIVE:      Disassembly of section .text:
+DISASM-NATIVE-EMPTY:
+DISASM-NATIVE-NEXT: 0000000180001000 <func>:
+DISASM-NATIVE-NEXT: 180001000: 52800020     mov     w0, #0x1                // =1
+DISASM-NATIVE-NEXT: 180001004: d65f03c0     ret
+
+RUN: llvm-readobj --headers --coff-exports out-drectve-native.dll | FileCheck --check-prefix=EXPORTS-NATIVE %s
+EXPORTS-NATIVE:      ExportTableRVA: 0x2{{.*}}
+EXPORTS-NATIVE-NEXT: ExportTableSize: 0x4{{.*}}
+EXPORTS-NATIVE:      Export {
+EXPORTS-NATIVE-NEXT:   Ordinal: 1
+EXPORTS-NATIVE-NEXT:   Name: func
+EXPORTS-NATIVE-NEXT:   RVA: 0x1000
+EXPORTS-NATIVE-NEXT: }
+EXPORTS-NATIVE:      HybridObject {
+EXPORTS-NATIVE:        ExportTableRVA: 0x0
+EXPORTS-NATIVE-NEXT:   ExportTableSize: 0x0
+EXPORTS-NATIVE-NOT:    Name: func
+
+# Export using the native .edata section.
+
+RUN: lld-link -machine:arm64x -dll -out:out-edata.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64-edata.obj -noentry
+RUN: llvm-objdump -d out-edata.dll | FileCheck --check-prefix=DISASM-NATIVE %s
+RUN: llvm-readobj --headers --coff-exports out-edata.dll | FileCheck --check-prefix=EXPORTS-NATIVE %s
+
+# Export using both the native and EC .drectve sections.
+
+RUN: lld-link -machine:arm64x -dll -out:out-both.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64-drectve.obj arm64ec-drectve.obj -noentry
+
+RUN: llvm-objdump -d out-both.dll | FileCheck --check-prefix=DISASM-BOTH %s
+DISASM-BOTH:      Disassembly of section .text:
+DISASM-BOTH-EMPTY:
+DISASM-BOTH-NEXT: 0000000180001000 <func>:
+DISASM-BOTH-NEXT: 180001000: 52800020     mov     w0, #0x1                // =1
+DISASM-BOTH-NEXT: 180001004: d65f03c0     ret
+DISASM-BOTH-NEXT:                 ...
+DISASM-BOTH-NEXT: 180002000: 52800040     mov     w0, #0x2                // =2
+DISASM-BOTH-NEXT: 180002004: d65f03c0     ret
+DISASM-BOTH-EMPTY:
+DISASM-BOTH-NEXT: Disassembly of section .hexpthk:
+DISASM-BOTH-EMPTY:
+DISASM-BOTH-NEXT: 0000000180003000 <.hexpthk>:
+DISASM-BOTH-NEXT: 180003000: 48 8b c4                     movq    %rsp, %rax
+DISASM-BOTH-NEXT: 180003003: 48 89 58 20                  movq    %rbx, 0x20(%rax)
+DISASM-BOTH-NEXT: 180003007: 55                           pushq   %rbp
+DISASM-BOTH-NEXT: 180003008: 5d                           popq    %rbp
+DISASM-BOTH-NEXT: 180003009: e9 f2 ef ff ff               jmp     0x180002000 <func+0x1000>
+DISASM-BOTH-NEXT: 18000300e: cc                           int3
+DISASM-BOTH-NEXT: 18000300f: cc                           int3
+
+RUN: llvm-readobj --headers --coff-exports out-both.dll | FileCheck --check-prefix=EXPORTS-BOTH %s
+EXPORTS-BOTH:      ExportTableRVA: 0x4{{.*}}
+EXPORTS-BOTH-NEXT: ExportTableSize: 0x4{{.*}}
+EXPORTS-BOTH:      Export {
+EXPORTS-BOTH-NEXT:   Ordinal: 1
+EXPORTS-BOTH-NEXT:   Name: func
+EXPORTS-BOTH-NEXT:   RVA: 0x1000
+EXPORTS-BOTH-NEXT: }
+EXPORTS-BOTH:      HybridObject {
+EXPORTS-BOTH:        ExportTableRVA: 0x4{{.*}}
+EXPORTS-BOTH-NEXT:   ExportTableSize: 0x4{{.*}}
+EXPORTS-BOTH:        Export {
+EXPORTS-BOTH-NEXT:     Ordinal: 1
+EXPORTS-BOTH-NEXT:     Name: func
+EXPORTS-BOTH-NEXT:     RVA: 0x3000
+EXPORTS-BOTH-NEXT:   }
+EXPORTS-BOTH-NEXT: }
+
+# Export using both the native and EC .edata sections.
+
+RUN: lld-link -machine:arm64x -dll -out:out-edata-both.dll arm64ec-func.obj arm64-func.obj \
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj arm64-edata.obj arm64ec-edata.obj -noentry
+RUN: llvm-readobj --headers --coff-exports out-edata-both.dll | FileCheck --check-prefix=EXPORTS-EDATA-BOTH %s
+EXPORTS-EDATA-BOTH:      ExportTableRVA: 0x3{{.*}}
+EXPORTS-EDATA-BOTH-NEXT: ExportTableSize: 0x4{{.*}}
+EXPORTS-EDATA-BOTH:      Export {
+EXPORTS-EDATA-BOTH-NEXT:   Ordinal: 1
+EXPORTS-EDATA-BOTH-NEXT:   Name: func
+EXPORTS-EDATA-BOTH-NEXT:   RVA: 0x1000
+EXPORTS-EDATA-BOTH-NEXT: }
+EXPORTS-EDATA-BOTH:      HybridObject {
+EXPORTS-EDATA-BOTH:        ExportTableRVA: 0x3{{.*}}
+EXPORTS-EDATA-BOTH-NEXT:   ExportTableSize: 0x4{{.*}}
+EXPORTS-EDATA-BOTH:        Export {
+EXPORTS-EDATA-BOTH-NEXT:     Ordinal: 1
+EXPORTS-EDATA-BOTH-NEXT:     Name: func
+EXPORTS-EDATA-BOTH-NEXT:     RVA: 0x2000
+EXPORTS-EDATA-BOTH-NEXT:   }
+EXPORTS-EDATA-BOTH-NEXT: }
+
+#--- arm64-func.s
+    .section .text,"xr",discard,func
+    .globl func
+    .p2align 2
+func:
+    mov w0, #1
+    ret
+
+#--- arm64ec-func.s
+    .section .text,"xr",discard,func
+    .globl func
+    .p2align 2
+func:
+    mov w0, #2
+    ret
+
+#--- func-drectve.s
+.section .drectve
+    .ascii "-export:func"
+
+#--- edata.s
+    .section .edata, "dr"
+    .align 4
+exports:
+    .long 0           // ExportFlags
+    .long 0           // TimeDateStamp
+    .long 0           // MajorVersion + MinorVersion
+    .rva name         // NameRVA
+    .long 1           // OrdinalBase
+    .long 1           // AddressTableEntries
+    .long 1           // NumberOfNamePointers
+    .rva functions    // ExportAddressTableRVA
+    .rva names        // NamePointerRVA
+    .rva nameordinals // OrdinalTableRVA
+
+names:
+    .rva funcname_func
+
+nameordinals:
+    .short 0
+
+functions:
+    .rva func
+    .long 0
+
+funcname_func:
+    .asciz "func"
+
+name:
+    .asciz "out-edata.dll"
diff --git a/lld/test/COFF/pdata-arm64ec.test b/lld/test/COFF/pdata-arm64ec.test
index 7f20c460dc109..fbec797525f7f 100644
--- a/lld/test/COFF/pdata-arm64ec.test
+++ b/lld/test/COFF/pdata-arm64ec.test
@@ -6,6 +6,7 @@ Test handlign of hybrid .pdata section on ARM64EC target.
 RUN: llvm-mc -filetype=obj -triple=arm64-windows arm64-func-sym.s -o arm64-func-sym.obj
 RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-func-sym.s -o arm64ec-func-sym.obj
 RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-func-sym.s -o x86_64-func-sym.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s -o loadconfig-arm64.obj
 RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %p/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
 
 Only arm64ec code:
@@ -55,11 +56,21 @@ DATA3: 180005000 00100000 11000001 00200000 0e200000
 Mixed arm64x code:
 
 RUN: lld-link -out:test4.dll -machine:arm64x arm64-func-sym.obj arm64ec-func-sym.obj \
-RUN:          x86_64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry
+RUN:          x86_64-func-sym.obj loadconfig-arm64.obj loadconfig-arm64ec.obj -dll -noentry
 
 RUN: llvm-readobj --headers test4.dll | FileCheck -check-prefix=DIR3 %s
-DIR3:      ExceptionTableRVA: 0x6000
-DIR3-NEXT: ExceptionTableSize: 0x10
+DIR3:      ImageOptionalHeader {
+DIR3:        DataDirectory {
+DIR3:          ExceptionTableRVA: 0x6000
+DIR3-NEXT:     ExceptionTableSize: 0x10
+DIR3:        }
+DIR3:      }
+DIR3:      HybridObject {
+DIR3:        ImageOptionalHeader {
+DIR3:          ExceptionTableRVA: 0x6010
+DIR3-NEXT:     ExceptionTableSize: 0xC
+DIR3:        }
+DIR3:      }
 
 RUN: llvm-objdump -s --section=.pdata test4.dll | FileCheck -check-prefix=DATA4 %s
 DATA4: 180006000 00100000 11000001 00200000 11000001  ......... ......
@@ -74,12 +85,12 @@ RUN: llvm-readobj --headers test5.dll | FileCheck -check-prefix=DIR2 %s
 RUN: llvm-objdump -s --section=.pdata test5.dll | FileCheck -check-prefix=DATA3 %s
 
 RUN: lld-link -out:test6.dll -machine:arm64x arm64ec-func-sym.obj x86_64-func-sym.obj \
-RUN:          arm64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry
+RUN:          arm64-func-sym.obj loadconfig-arm64.obj loadconfig-arm64ec.obj -dll -noentry
 RUN: llvm-readobj --headers test6.dll | FileCheck -check-prefix=DIR3 %s
 RUN: llvm-objdump -s --section=.pdata test6.dll | FileCheck -check-prefix=DATA4 %s
 
 RUN: lld-link -out:test7.dll -machine:arm64x x86_64-func-sym.obj arm64ec-func-sym.obj \
-RUN:          arm64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry
+RUN:          arm64-func-sym.obj loadconfig-arm64.obj loadconfig-arm64ec.obj -dll -noentry
 RUN: llvm-readobj --headers test7.dll | FileCheck -check-prefix=DIR3 %s
 RUN: llvm-objdump -s --section=.pdata test7.dll | FileCheck -check-prefix=DATA4 %s
 
diff --git a/lld/test/ELF/aarch64-tlsdesc-pauth.s b/lld/test/ELF/aarch64-tlsdesc-pauth.s
new file mode 100644
index 0000000000000..bf0ae4a87f322
--- /dev/null
+++ b/lld/test/ELF/aarch64-tlsdesc-pauth.s
@@ -0,0 +1,134 @@
+// REQUIRES: aarch64
+// RUN: rm -rf %t && split-file %s %t && cd %t
+
+//--- a.s
+.section .tbss,"awT",@nobits
+.global a
+a:
+.xword 0
+
+//--- ok.s
+// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux -mattr=+pauth ok.s -o ok.o
+// RUN: ld.lld -shared ok.o -o ok.so
+// RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn ok.so | \
+// RUN:   FileCheck -DP=20 -DA=896 -DB=912 -DC=928 %s
+// RUN: llvm-readobj -r -x .got ok.so | FileCheck --check-prefix=REL \
+// RUN:   -DP1=20 -DA1=380 -DB1=390 -DC1=3A0 -DP2=020 -DA2=380 -DB2=390 -DC2=3a0 %s
+
+// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux -mattr=+pauth a.s -o a.so.o
+// RUN: ld.lld -shared a.so.o -soname=so -o a.so
+// RUN: ld.lld ok.o a.so -o ok
+// RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn ok | \
+// RUN:   FileCheck -DP=220 -DA=936 -DB=952 -DC=968 %s
+// RUN: llvm-readobj -r -x .got ok | FileCheck --check-prefix=REL \
+// RUN:   -DP1=220 -DA1=3A8 -DB1=3B8 -DC1=3C8 -DP2=220 -DA2=3a8 -DB2=3b8 -DC2=3c8 %s
+
+        .text
+        adrp    x0, :tlsdesc_auth:a
+        ldr     x16, [x0, :tlsdesc_auth_lo12:a]
+        add     x0, x0, :tlsdesc_auth_lo12:a
+        blraa   x16, x0
+
+// CHECK:      adrp    x0, 0x[[P]]000
+// CHECK-NEXT: ldr     x16, [x0, #[[A]]]
+// CHECK-NEXT: add     x0, x0, #[[A]]
+// CHECK-NEXT: blraa   x16, x0
+
+/// Create relocation against local TLS symbols where linker should
+/// create target specific dynamic TLSDESC relocation where addend is
+/// the symbol VMA in tls block.
+
+        adrp    x0, :tlsdesc_auth:local1
+        ldr     x16, [x0, :tlsdesc_auth_lo12:local1]
+        add     x0, x0, :tlsdesc_auth_lo12:local1
+        blraa   x16, x0
+
+// CHECK:      adrp    x0, 0x[[P]]000
+// CHECK-NEXT: ldr     x16, [x0, #[[B]]]
+// CHECK-NEXT: add     x0, x0, #[[B]]
+// CHECK-NEXT: blraa   x16, x0
+
+        adrp    x0, :tlsdesc_auth:local2
+        ldr     x16, [x0, :tlsdesc_auth_lo12:local2]
+        add     x0, x0, :tlsdesc_auth_lo12:local2
+        blraa   x16, x0
+
+// CHECK:      adrp    x0, 0x[[P]]000
+// CHECK-NEXT: ldr     x16, [x0, #[[C]]]
+// CHECK-NEXT: add     x0, x0, #[[C]]
+// CHECK-NEXT: blraa   x16, x0
+
+        .section .tbss,"awT",@nobits
+        .type   local1,@object
+        .p2align 2
+local1:
+        .word   0
+        .size   local1, 4
+
+        .type   local2,@object
+        .p2align 3
+local2:
+        .xword  0
+        .size   local2, 8
+
+
+// R_AARCH64_AUTH_TLSDESC - 0x0 -> start of tls block
+// R_AARCH64_AUTH_TLSDESC - 0x8 -> align (sizeof (local1), 8)
+
+// REL:      Relocations [
+// REL-NEXT:   Section (5) .rela.dyn {
+// REL-NEXT:     0x[[P1]][[B1]] R_AARCH64_AUTH_TLSDESC - 0x0
+// REL-NEXT:     0x[[P1]][[C1]] R_AARCH64_AUTH_TLSDESC - 0x8
+// REL-NEXT:     0x[[P1]][[A1]] R_AARCH64_AUTH_TLSDESC a 0x0
+// REL-NEXT:   }
+// REL-NEXT: ]
+
+// REL:      Hex dump of section '.got':
+// REL-NEXT: 0x00[[P2]][[A2]] 00000000 00000080 00000000 000000a0
+// REL-NEXT: 0x00[[P2]][[B2]] 00000000 00000080 00000000 000000a0
+// REL-NEXT: 0x00[[P2]][[C2]] 00000000 00000080 00000000 000000a0
+///                                          ^^
+///                                          0b10000000 bit 63 address diversity = true, bits 61..60 key = IA
+///                                                            ^^
+///                                                            0b10100000 bit 63 address diversity = true, bits 61..60 key = DA
+
+//--- err1.s
+// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux -mattr=+pauth err1.s -o err1.o
+// RUN: not ld.lld -shared err1.o 2>&1 | FileCheck --check-prefix=ERR1 --implicit-check-not=error: %s
+// ERR1: error: both AUTH and non-AUTH TLSDESC entries for 'a' requested, but only one type of TLSDESC entry per symbol is supported
+        .text
+        adrp    x0, :tlsdesc_auth:a
+        ldr     x16, [x0, :tlsdesc_auth_lo12:a]
+        add     x0, x0, :tlsdesc_auth_lo12:a
+        blraa   x16, x0
+
+        adrp    x0, :tlsdesc:a
+        ldr     x1, [x0, :tlsdesc_lo12:a]
+        add     x0, x0, :tlsdesc_lo12:a
+        blr     x1
+
+//--- err2.s
+// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux -mattr=+pauth err2.s -o err2.o
+// RUN: not ld.lld -shared err2.o 2>&1 | FileCheck --check-prefix=ERR2 --implicit-check-not=error: %s
+// ERR2: error: both AUTH and non-AUTH TLSDESC entries for 'a' requested, but only one type of TLSDESC entry per symbol is supported
+        .text
+        adrp    x0, :tlsdesc:a
+        ldr     x1, [x0, :tlsdesc_lo12:a]
+        add     x0, x0, :tlsdesc_lo12:a
+        blr     x1
+
+        adrp    x0, :tlsdesc_auth:a
+        ldr     x16, [x0, :tlsdesc_auth_lo12:a]
+        add     x0, x0, :tlsdesc_auth_lo12:a
+        blraa   x16, x0
+
+//--- err3.s
+// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux -mattr=+pauth err3.s -o err3.o
+// RUN: not ld.lld -shared err3.o 2>&1 | FileCheck --check-prefix=ERR3 --implicit-check-not=error: %s
+// ERR3: error: both AUTH and non-AUTH TLSDESC entries for 'a' requested, but only one type of TLSDESC entry per symbol is supported
+        .text
+        adrp    x0, :tlsdesc_auth:a
+        ldr     x16, [x0, :tlsdesc_auth_lo12:a]
+        add     x0, x0, :tlsdesc_auth_lo12:a
+        .tlsdesccall a
+        blraa   x16, x0
diff --git a/lld/test/ELF/linkerscript/diag.test b/lld/test/ELF/linkerscript/diag.test
index fbc24659a5311..114f5c2c0820b 100644
--- a/lld/test/ELF/linkerscript/diag.test
+++ b/lld/test/ELF/linkerscript/diag.test
@@ -12,9 +12,9 @@ SECTIONS {
 }
 
 # RUN: not ld.lld -shared 0.o -T 1.lds 2>&1 | FileCheck %s --check-prefix=CHECK1 --match-full-lines --strict-whitespace
-#      CHECK1:{{.*}}:2: malformed number: +
+#      CHECK1:{{.*}}:2: malformed number: {
 # CHECK1-NEXT:>>>   .text + { *(.text) }
-# CHECK1-NEXT:>>>         ^
+# CHECK1-NEXT:>>>           ^
 
 #--- 2.lds
 
diff --git a/lld/test/ELF/linkerscript/operators.test b/lld/test/ELF/linkerscript/operators.test
index 27209a2e40f59..f84f23fd4d469 100644
--- a/lld/test/ELF/linkerscript/operators.test
+++ b/lld/test/ELF/linkerscript/operators.test
@@ -73,6 +73,8 @@ SECTIONS {
   log2ceil100000000 = LOG2CEIL(0x100000000);
   log2ceil100000001 = LOG2CEIL(0x100000001);
   log2ceilmax = LOG2CEIL(0xffffffffffffffff);
+  unaryadd = +3 + ++5;
+  unaryadd_and_unaryminus = 15 + +-5 + -+7;
 }
 
 # CHECK:      0000000000000002 A unary
@@ -126,6 +128,8 @@ SECTIONS {
 # CHECK-NEXT: 0000000000000020 A log2ceil100000000
 # CHECK-NEXT: 0000000000000021 A log2ceil100000001
 # CHECK-NEXT: 0000000000000040 A log2ceilmax
+# CHECK-NEXT: 0000000000000008 A unaryadd
+# CHECK-NEXT: 0000000000000003 A unaryadd_and_unaryminus
 
 ## Mailformed number error.
 # RUN: echo "SECTIONS { . = 0x12Q41; }" > %t.script
diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h
index eb371e33c4951..e0819f1684f8b 100644
--- a/lldb/include/lldb/API/SBDebugger.h
+++ b/lldb/include/lldb/API/SBDebugger.h
@@ -42,12 +42,16 @@ class LLDB_API SBInputReader {
 
 class LLDB_API SBDebugger {
 public:
-  FLAGS_ANONYMOUS_ENUM(){
-      eBroadcastBitProgress = lldb::DebuggerBroadcastBit::eBroadcastBitProgress,
-      eBroadcastBitWarning = lldb::DebuggerBroadcastBit::eBroadcastBitWarning,
-      eBroadcastBitError = lldb::DebuggerBroadcastBit::eBroadcastBitError,
-      eBroadcastBitProgressCategory =
-          lldb::DebuggerBroadcastBit::eBroadcastBitProgressCategory,
+  FLAGS_ANONYMOUS_ENUM() {
+    eBroadcastBitProgress = lldb::DebuggerBroadcastBit::eBroadcastBitProgress,
+    eBroadcastBitWarning = lldb::DebuggerBroadcastBit::eBroadcastBitWarning,
+    eBroadcastBitError = lldb::DebuggerBroadcastBit::eBroadcastBitError,
+    eBroadcastBitProgressCategory =
+        lldb::DebuggerBroadcastBit::eBroadcastBitProgressCategory,
+    eBroadcastBitExternalProgress =
+        lldb::DebuggerBroadcastBit::eBroadcastBitExternalProgress,
+    eBroadcastBitExternalProgressCategory =
+        lldb::DebuggerBroadcastBit::eBroadcastBitExternalProgressCategory,
   };
   SBDebugger();
 
diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h
index 26deba38f8471..27b863870090c 100644
--- a/lldb/include/lldb/Host/Editline.h
+++ b/lldb/include/lldb/Host/Editline.h
@@ -152,7 +152,7 @@ using namespace line_editor;
 class Editline {
 public:
   Editline(const char *editor_name, FILE *input_file, FILE *output_file,
-           FILE *error_file, std::recursive_mutex &output_mutex);
+           FILE *error_file, bool color, std::recursive_mutex &output_mutex);
 
   ~Editline();
 
@@ -212,19 +212,23 @@ class Editline {
   }
 
   void SetPromptAnsiPrefix(std::string prefix) {
-    m_prompt_ansi_prefix = std::move(prefix);
+    if (m_color)
+      m_prompt_ansi_prefix = std::move(prefix);
   }
 
   void SetPromptAnsiSuffix(std::string suffix) {
-    m_prompt_ansi_suffix = std::move(suffix);
+    if (m_color)
+      m_prompt_ansi_suffix = std::move(suffix);
   }
 
   void SetSuggestionAnsiPrefix(std::string prefix) {
-    m_suggestion_ansi_prefix = std::move(prefix);
+    if (m_color)
+      m_suggestion_ansi_prefix = std::move(prefix);
   }
 
   void SetSuggestionAnsiSuffix(std::string suffix) {
-    m_suggestion_ansi_suffix = std::move(suffix);
+    if (m_color)
+      m_suggestion_ansi_suffix = std::move(suffix);
   }
 
   /// Prompts for and reads a single line of user input.
@@ -400,6 +404,7 @@ class Editline {
   CompleteCallbackType m_completion_callback;
   SuggestionCallbackType m_suggestion_callback;
 
+  bool m_color;
   std::string m_prompt_ansi_prefix;
   std::string m_prompt_ansi_suffix;
   std::string m_suggestion_ansi_prefix;
diff --git a/lldb/include/lldb/Utility/AnsiTerminal.h b/lldb/include/lldb/Utility/AnsiTerminal.h
index 67795971d2ca8..1939c49c7b859 100644
--- a/lldb/include/lldb/Utility/AnsiTerminal.h
+++ b/lldb/include/lldb/Utility/AnsiTerminal.h
@@ -171,7 +171,32 @@ inline std::string FormatAnsiTerminalCodes(llvm::StringRef format,
   }
   return fmt;
 }
+
+inline std::string StripAnsiTerminalCodes(llvm::StringRef str) {
+  std::string stripped;
+  while (!str.empty()) {
+    llvm::StringRef left, right;
+
+    std::tie(left, right) = str.split(ANSI_ESC_START);
+    stripped += left;
+
+    // ANSI_ESC_START not found.
+    if (left == str && right.empty())
+      break;
+
+    size_t end = right.find_first_not_of("0123456789;");
+    if (end < right.size() && (right[end] == 'm' || right[end] == 'G')) {
+      str = right.substr(end + 1);
+    } else {
+      // ANSI_ESC_END not found.
+      stripped += ANSI_ESC_START;
+      str = right;
+    }
+  }
+  return stripped;
 }
+
+} // namespace ansi
 } // namespace lldb_private
 
 #endif
diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp
index 695c2481e353d..ca06b52b874db 100644
--- a/lldb/source/Core/IOHandler.cpp
+++ b/lldb/source/Core/IOHandler.cpp
@@ -264,7 +264,7 @@ IOHandlerEditline::IOHandlerEditline(
   if (use_editline) {
     m_editline_up = std::make_unique<Editline>(editline_name, GetInputFILE(),
                                                GetOutputFILE(), GetErrorFILE(),
-                                               GetOutputMutex());
+                                               m_color, GetOutputMutex());
     m_editline_up->SetIsInputCompleteCallback(
         [this](Editline *editline, StringList &lines) {
           return this->IsInputCompleteCallback(editline, lines);
@@ -278,12 +278,10 @@ IOHandlerEditline::IOHandlerEditline(
       m_editline_up->SetSuggestionCallback([this](llvm::StringRef line) {
         return this->SuggestionCallback(line);
       });
-      if (m_color) {
-        m_editline_up->SetSuggestionAnsiPrefix(ansi::FormatAnsiTerminalCodes(
-            debugger.GetAutosuggestionAnsiPrefix()));
-        m_editline_up->SetSuggestionAnsiSuffix(ansi::FormatAnsiTerminalCodes(
-            debugger.GetAutosuggestionAnsiSuffix()));
-      }
+      m_editline_up->SetSuggestionAnsiPrefix(ansi::FormatAnsiTerminalCodes(
+          debugger.GetAutosuggestionAnsiPrefix()));
+      m_editline_up->SetSuggestionAnsiSuffix(ansi::FormatAnsiTerminalCodes(
+          debugger.GetAutosuggestionAnsiSuffix()));
     }
     // See if the delegate supports fixing indentation
     const char *indent_chars = delegate.IOHandlerGetFixIndentationCharacters();
@@ -478,12 +476,10 @@ bool IOHandlerEditline::SetPrompt(llvm::StringRef prompt) {
 #if LLDB_ENABLE_LIBEDIT
   if (m_editline_up) {
     m_editline_up->SetPrompt(m_prompt.empty() ? nullptr : m_prompt.c_str());
-    if (m_color) {
-      m_editline_up->SetPromptAnsiPrefix(
-          ansi::FormatAnsiTerminalCodes(m_debugger.GetPromptAnsiPrefix()));
-      m_editline_up->SetPromptAnsiSuffix(
-          ansi::FormatAnsiTerminalCodes(m_debugger.GetPromptAnsiSuffix()));
-    }
+    m_editline_up->SetPromptAnsiPrefix(
+        ansi::FormatAnsiTerminalCodes(m_debugger.GetPromptAnsiPrefix()));
+    m_editline_up->SetPromptAnsiSuffix(
+        ansi::FormatAnsiTerminalCodes(m_debugger.GetPromptAnsiSuffix()));
   }
 #endif
   return true;
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 6e35b15d69651..73da1d8481618 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -14,6 +14,7 @@
 #include "lldb/Host/Editline.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/Host.h"
+#include "lldb/Utility/AnsiTerminal.h"
 #include "lldb/Utility/CompletionRequest.h"
 #include "lldb/Utility/FileSpec.h"
 #include "lldb/Utility/LLDBAssert.h"
@@ -85,7 +86,8 @@ bool IsOnlySpaces(const EditLineStringType &content) {
 }
 
 static size_t ColumnWidth(llvm::StringRef str) {
-  return llvm::sys::locale::columnWidth(str);
+  std::string stripped = ansi::StripAnsiTerminalCodes(str);
+  return llvm::sys::locale::columnWidth(stripped);
 }
 
 static int GetOperation(HistoryOperation op) {
@@ -610,7 +612,7 @@ int Editline::GetCharacter(EditLineGetCharType *c) {
 }
 
 const char *Editline::Prompt() {
-  if (!m_prompt_ansi_prefix.empty() || !m_prompt_ansi_suffix.empty())
+  if (m_color)
     m_needs_prompt_repaint = true;
   return m_current_prompt.c_str();
 }
@@ -1471,11 +1473,11 @@ Editline *Editline::InstanceFor(EditLine *editline) {
 }
 
 Editline::Editline(const char *editline_name, FILE *input_file,
-                   FILE *output_file, FILE *error_file,
+                   FILE *output_file, FILE *error_file, bool color,
                    std::recursive_mutex &output_mutex)
     : m_editor_status(EditorStatus::Complete), m_input_file(input_file),
       m_output_file(output_file), m_error_file(error_file),
-      m_input_connection(fileno(input_file), false),
+      m_input_connection(fileno(input_file), false), m_color(color),
       m_output_mutex(output_mutex) {
   // Get a shared history instance
   m_editor_name = (editline_name == nullptr) ? "lldb-tmp" : editline_name;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index fb3af44abfa8d..81a1375c03718 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -3084,51 +3084,10 @@ size_t DWARFASTParserClang::ParseChildParameters(
     const dw_tag_t tag = die.Tag();
     switch (tag) {
     case DW_TAG_formal_parameter: {
-      DWARFAttributes attributes = die.GetAttributes();
-      if (attributes.Size() == 0) {
-        arg_idx++;
-        break;
-      }
-
-      const char *name = nullptr;
-      DWARFFormValue param_type_die_form;
-      bool is_artificial = false;
-      // one of None, Auto, Register, Extern, Static, PrivateExtern
-
-      clang::StorageClass storage = clang::SC_None;
-      uint32_t i;
-      for (i = 0; i < attributes.Size(); ++i) {
-        const dw_attr_t attr = attributes.AttributeAtIndex(i);
-        DWARFFormValue form_value;
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_name:
-            name = form_value.AsCString();
-            break;
-          case DW_AT_type:
-            param_type_die_form = form_value;
-            break;
-          case DW_AT_artificial:
-            is_artificial = form_value.Boolean();
-            break;
-          case DW_AT_location:
-          case DW_AT_const_value:
-          case DW_AT_default_value:
-          case DW_AT_description:
-          case DW_AT_endianity:
-          case DW_AT_is_optional:
-          case DW_AT_segment:
-          case DW_AT_variable_parameter:
-          default:
-          case DW_AT_abstract_origin:
-          case DW_AT_sibling:
-            break;
-          }
-        }
-      }
+      const char *name = die.GetName();
+      DWARFDIE param_type_die = die.GetAttributeValueAsReferenceDIE(DW_AT_type);
 
-      bool skip = false;
-      if (is_artificial) {
+      if (die.GetAttributeValueAsUnsigned(DW_AT_artificial, 0)) {
         // In order to determine if a C++ member function is "const" we
         // have to look at the const-ness of "this"...
         if (arg_idx == 0 &&
@@ -3137,8 +3096,7 @@ size_t DWARFASTParserClang::ParseChildParameters(
             // specification DIEs, so we can't rely upon the name being in
             // the formal parameter DIE...
             (name == nullptr || ::strcmp(name, "this") == 0)) {
-          Type *this_type = die.ResolveTypeUID(param_type_die_form.Reference());
-          if (this_type) {
+          if (Type *this_type = die.ResolveTypeUID(param_type_die)) {
             uint32_t encoding_mask = this_type->GetEncodingMask();
             if (encoding_mask & Type::eEncodingIsPointerUID) {
               is_static = false;
@@ -3150,23 +3108,18 @@ size_t DWARFASTParserClang::ParseChildParameters(
             }
           }
         }
-        skip = true;
-      }
-
-      if (!skip) {
-        Type *type = die.ResolveTypeUID(param_type_die_form.Reference());
-        if (type) {
-          function_param_types.push_back(type->GetForwardCompilerType());
+      } else if (Type *type = die.ResolveTypeUID(param_type_die)) {
+        function_param_types.push_back(type->GetForwardCompilerType());
 
-          clang::ParmVarDecl *param_var_decl = m_ast.CreateParameterDeclaration(
-              containing_decl_ctx, GetOwningClangModule(die), name,
-              type->GetForwardCompilerType(), storage);
-          assert(param_var_decl);
-          function_param_decls.push_back(param_var_decl);
+        clang::ParmVarDecl *param_var_decl = m_ast.CreateParameterDeclaration(
+            containing_decl_ctx, GetOwningClangModule(die), name,
+            type->GetForwardCompilerType(), clang::StorageClass::SC_None);
+        assert(param_var_decl);
+        function_param_decls.push_back(param_var_decl);
 
-          m_ast.SetMetadataAsUserID(param_var_decl, die.GetID());
-        }
+        m_ast.SetMetadataAsUserID(param_var_decl, die.GetID());
       }
+
       arg_idx++;
     } break;
 
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index c47e728fdf716..89731f798deda 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1292,17 +1292,12 @@ bool Process::HasAssignedIndexIDToThread(uint64_t thread_id) {
 }
 
 uint32_t Process::AssignIndexIDToThread(uint64_t thread_id) {
-  uint32_t result = 0;
-  std::map<uint64_t, uint32_t>::iterator iterator =
-      m_thread_id_to_index_id_map.find(thread_id);
-  if (iterator == m_thread_id_to_index_id_map.end()) {
-    result = ++m_thread_index_id;
-    m_thread_id_to_index_id_map[thread_id] = result;
-  } else {
-    result = iterator->second;
-  }
+  auto [iterator, inserted] =
+      m_thread_id_to_index_id_map.try_emplace(thread_id, m_thread_index_id + 1);
+  if (inserted)
+    ++m_thread_index_id;
 
-  return result;
+  return iterator->second;
 }
 
 StateType Process::GetState() {
diff --git a/lldb/test/API/terminal/TestEditline.py b/lldb/test/API/terminal/TestEditline.py
index aa7d827e59944..ddaa441d5f7c1 100644
--- a/lldb/test/API/terminal/TestEditline.py
+++ b/lldb/test/API/terminal/TestEditline.py
@@ -2,7 +2,6 @@
 Test that the lldb editline handling is configured correctly.
 """
 
-
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
@@ -69,6 +68,22 @@ def test_prompt_color(self):
         # Column: 1....6.8
         self.child.expect(re.escape("\x1b[31m(lldb) \x1b[0m\x1b[8G"))
 
+    @skipIfAsan
+    @skipIfEditlineSupportMissing
+    def test_prompt_format_color(self):
+        """Test that we can change the prompt color with a format string."""
+        self.launch(use_colors=True)
+        # Clear the prefix and suffix setting to simplify the output.
+        self.expect('settings set prompt-ansi-prefix ""')
+        self.expect('settings set prompt-ansi-suffix ""')
+        self.expect('settings set prompt "${ansi.fg.red}(lldb) ${ansi.normal}"')
+        self.child.send("foo")
+        # Make sure this change is reflected immediately. Check that the color
+        # is set (31) and the cursor position (8) is correct.
+        # Prompt: (lldb) _
+        # Column: 1....6.8
+        self.child.expect(re.escape("\x1b[31m(lldb) \x1b[0m\x1b[8Gfoo"))
+
     @skipIfAsan
     @skipIfEditlineSupportMissing
     def test_prompt_no_color(self):
diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
index 7e8f7b5f6df67..6b12569d90a83 100644
--- a/lldb/tools/lldb-dap/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -414,7 +414,8 @@ void SendStdOutStdErr(DAP &dap, lldb::SBProcess &process) {
 void ProgressEventThreadFunction(DAP &dap) {
   lldb::SBListener listener("lldb-dap.progress.listener");
   dap.debugger.GetBroadcaster().AddListener(
-      listener, lldb::SBDebugger::eBroadcastBitProgress);
+      listener, lldb::SBDebugger::eBroadcastBitProgress |
+                    lldb::SBDebugger::eBroadcastBitExternalProgress);
   dap.broadcaster.AddListener(listener, eBroadcastBitStopProgressThread);
   lldb::SBEvent event;
   bool done = false;
diff --git a/lldb/unittests/Editline/EditlineTest.cpp b/lldb/unittests/Editline/EditlineTest.cpp
index 333ad77a0a16f..1327b587e7c3d 100644
--- a/lldb/unittests/Editline/EditlineTest.cpp
+++ b/lldb/unittests/Editline/EditlineTest.cpp
@@ -118,7 +118,7 @@ EditlineAdapter::EditlineAdapter()
   // Create an Editline instance.
   _editline_sp.reset(new lldb_private::Editline(
       "gtest editor", *_el_secondary_file, *_el_secondary_file,
-      *_el_secondary_file, output_mutex));
+      *_el_secondary_file, /*color=*/false, output_mutex));
   _editline_sp->SetPrompt("> ");
 
   // Hookup our input complete callback.
diff --git a/lldb/unittests/Utility/AnsiTerminalTest.cpp b/lldb/unittests/Utility/AnsiTerminalTest.cpp
index a6dbfd6106142..1ba9565c3f6af 100644
--- a/lldb/unittests/Utility/AnsiTerminalTest.cpp
+++ b/lldb/unittests/Utility/AnsiTerminalTest.cpp
@@ -16,16 +16,21 @@ TEST(AnsiTerminal, Empty) { EXPECT_EQ("", ansi::FormatAnsiTerminalCodes("")); }
 
 TEST(AnsiTerminal, WhiteSpace) {
   EXPECT_EQ(" ", ansi::FormatAnsiTerminalCodes(" "));
+  EXPECT_EQ(" ", ansi::StripAnsiTerminalCodes(" "));
 }
 
 TEST(AnsiTerminal, AtEnd) {
   EXPECT_EQ("abc\x1B[30m",
             ansi::FormatAnsiTerminalCodes("abc${ansi.fg.black}"));
+
+  EXPECT_EQ("abc", ansi::StripAnsiTerminalCodes("abc\x1B[30m"));
 }
 
 TEST(AnsiTerminal, AtStart) {
   EXPECT_EQ("\x1B[30mabc",
             ansi::FormatAnsiTerminalCodes("${ansi.fg.black}abc"));
+
+  EXPECT_EQ("abc", ansi::StripAnsiTerminalCodes("\x1B[30mabc"));
 }
 
 TEST(AnsiTerminal, KnownPrefix) {
@@ -45,10 +50,20 @@ TEST(AnsiTerminal, Incomplete) {
 TEST(AnsiTerminal, Twice) {
   EXPECT_EQ("\x1B[30m\x1B[31mabc",
             ansi::FormatAnsiTerminalCodes("${ansi.fg.black}${ansi.fg.red}abc"));
+
+  EXPECT_EQ("abc", ansi::StripAnsiTerminalCodes("\x1B[30m\x1B[31mabc"));
 }
 
 TEST(AnsiTerminal, Basic) {
   EXPECT_EQ(
       "abc\x1B[31mabc\x1B[0mabc",
       ansi::FormatAnsiTerminalCodes("abc${ansi.fg.red}abc${ansi.normal}abc"));
+
+  EXPECT_EQ("abcabcabc",
+            ansi::StripAnsiTerminalCodes("abc\x1B[31mabc\x1B[0mabc"));
+}
+
+TEST(AnsiTerminal, InvalidEscapeCode) {
+  EXPECT_EQ("abc\x1B[31kabcabc",
+            ansi::StripAnsiTerminalCodes("abc\x1B[31kabc\x1B[0mabc"));
 }
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index f14065ab03799..ad12100fdb5b8 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -486,6 +486,7 @@ set(LLVM_ALL_TARGETS
   PowerPC
   RISCV
   Sparc
+  SPIRV
   SystemZ
   VE
   WebAssembly
@@ -498,7 +499,6 @@ set(LLVM_ALL_EXPERIMENTAL_TARGETS
   CSKY
   DirectX
   M68k
-  SPIRV
   Xtensa
 )
 
diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md
index 56457946136f0..534d81e68d024 100644
--- a/llvm/Maintainers.md
+++ b/llvm/Maintainers.md
@@ -284,8 +284,11 @@ koachan@protonmail.com (email), [koachan](https://github.com/koachan) (GitHub)
 
 #### SPIRV backend
 
-Ilia Diachkov \
-ilia.diachkov@gmail.com (email), [iliya-diyachkov](https://github.com/iliya-diyachkov) (GitHub)
+Vyacheslav Levytskyy \
+vyacheslav.levytskyy@intel.com, vyacheslav.levytskyy@gmail.com (email), [VyacheslavLevytskyy](https://github.com/VyacheslavLevytskyy) (GitHub)
+
+Nathan Gauër \
+brioche@google.com (email), [Keenuts](https://github.com/Keenuts) (GitHub)
 
 #### SystemZ backend
 
diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst
index 18b05d2e58e6e..5b1f7dc29bee3 100644
--- a/llvm/docs/DeveloperPolicy.rst
+++ b/llvm/docs/DeveloperPolicy.rst
@@ -1154,6 +1154,18 @@ acceptable for their contributions.  We feel that a high burden for relicensing
 is good for the project, because contributors do not have to fear that their
 code will be used in a way with which they disagree.
 
+Embedded Copyright or 'Contributed by' Statements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The LLVM project does not accept contributions that include in-source copyright
+notices except where such notices are part of a larger external project being
+added as a vendored dependency.
+
+LLVM source code lives for a long time and is edited by many people, the best
+way to track contributions is through revision control history.
+See the `Attribution of Changes`_ section for more information about attributing
+changes to authors other than the committer.
+
 Relicensing
 -----------
 
diff --git a/llvm/docs/Docker.rst b/llvm/docs/Docker.rst
index 6b3c80da684b4..5d976eddb3130 100644
--- a/llvm/docs/Docker.rst
+++ b/llvm/docs/Docker.rst
@@ -9,7 +9,7 @@ You can find a number of sources to build docker images with LLVM components in
 images for their own use, or as a starting point for someone who wants to write
 their own Dockerfiles.
 
-We currently provide Dockerfiles with ``debian10`` and ``nvidia-cuda`` base images.
+We currently provide Dockerfiles with ``debian12`` and ``nvidia-cuda`` base images.
 We also provide an ``example`` image, which contains placeholders that one would need
 to fill out in order to produce Dockerfiles for a new docker image.
 
@@ -72,13 +72,13 @@ checkout from git and provide a list of CMake arguments to use during when
 building LLVM inside docker container.
 
 Here's a very simple example of getting a docker image with clang binary,
-compiled by the system compiler in the debian10 image:
+compiled by the system compiler in the debian12 image:
 
 .. code-block:: bash
 
     ./llvm/utils/docker/build_docker_image.sh \
-	--source debian10 \
-	--docker-repository clang-debian10 --docker-tag "staging" \
+	--source debian12 \
+	--docker-repository clang-debian12 --docker-tag "staging" \
 	-p clang -i install-clang -i install-clang-resource-headers \
 	-- \
 	-DCMAKE_BUILD_TYPE=Release
@@ -93,51 +93,45 @@ this command will do that:
     #   LLVM_TARGETS_TO_BUILD=Native is to reduce stage1 compile time.
     #   Options, starting with BOOTSTRAP_* are passed to stage2 cmake invocation.
     ./build_docker_image.sh \
-	--source debian10 \
-	--docker-repository clang-debian10 --docker-tag "staging" \
+	--source debian12 \
+	--docker-repository clang-debian12 --docker-tag "staging" \
 	-p clang -i stage2-install-clang -i stage2-install-clang-resource-headers \
 	-- \
 	-DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \
 	-DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \
 	-DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-resource-headers"
 	
-This will produce a new image ``clang-debian10:staging`` from the latest
+This will produce a new image ``clang-debian12:staging`` from the latest
 upstream revision.
 After the image is built you can run bash inside a container based on your image
 like this:
 
 .. code-block:: bash
 
-    docker run -ti clang-debian10:staging bash
+    docker run -ti clang-debian12:staging bash
 
 Now you can run bash commands as you normally would:
 
 .. code-block:: bash
 
     root@80f351b51825:/# clang -v
-    clang version 5.0.0 (trunk 305064)
+    clang version 19.1.7 (trunk 524462)
+    Target: x86_64-unknown-linux-gnu
     Target: x86_64-unknown-linux-gnu
     Thread model: posix
     InstalledDir: /bin
-    Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8
-    Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8.4
-    Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
-    Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
-    Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
-    Candidate multilib: .;@m64
-    Selected multilib: .;@m64
 
 
 Which image should I choose?
 ============================
-We currently provide two images: Debian10-based and nvidia-cuda-based. They
+We currently provide two images: Debian12-based and nvidia-cuda-based. They
 differ in the base image that they use, i.e. they have a different set of
 preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has
 preinstalled CUDA libraries and allows to access a GPU, installed on your
 machine.
 
 If you need a minimal linux distribution with only clang and libstdc++ included,
-you should try Debian10-based image.
+you should try Debian12-based image.
 
 If you want to use CUDA libraries and have access to a GPU on your machine,
 you should choose nvidia-cuda-based image and use `nvidia-docker
@@ -150,7 +144,7 @@ If you have a different use-case, you could create your own image based on
 ``example/`` folder.
 
 Any docker image can be built and run using only the docker binary, i.e. you can
-run debian10 build on Fedora or any other Linux distribution. You don't need to
+run debian12 build on Fedora or any other Linux distribution. You don't need to
 install CMake, compilers or any other clang dependencies. It is all handled
 during the build process inside Docker's isolated environment.
 
@@ -158,12 +152,12 @@ Stable build
 ============
 If you want a somewhat recent and somewhat stable build, use the
 ``branches/google/stable`` branch, i.e. the following command will produce a
-Debian10-based image using the latest ``google/stable`` sources for you:
+Debian12-based image using the latest ``google/stable`` sources for you:
 
 .. code-block:: bash
 
     ./llvm/utils/docker/build_docker_image.sh \
-	-s debian10 --d clang-debian10 -t "staging" \
+	-s debian12 --d clang-debian12 -t "staging" \
 	--branch branches/google/stable \
 	-p clang -i install-clang -i install-clang-resource-headers \
 	-- \
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8cc9036d1b67f..b922636d6c914 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -4776,8 +4776,8 @@ allowing the '``or``' to be folded to -1.
       %B = undef
       %C = undef
 
-This set of examples shows that undefined '``select``' (and conditional
-branch) conditions can go *either way*, but they have to come from one
+This set of examples shows that undefined '``select``'
+conditions can go *either way*, but they have to come from one
 of the two operands. In the ``%A`` example, if ``%X`` and ``%Y`` were
 both known to have a clear low bit, then ``%A`` would have to have a
 cleared low bit. However, in the ``%C`` example, the optimizer is
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 8f88b824f965a..50cdaafcb3bb7 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -47,6 +47,12 @@ for adding a new subsection. -->
   same semantics. The normalizer makes it easier to spot semantic differences
   when diffing two modules which have undergone different passes.
 
+* The SPIR-V backend is now an official LLVM target, providing OpenCL and SYCL
+  conformance and establishing a foundation for broader applicability to other
+  APIs, including Vulkan, GLSL, and HLSL. This backend aims to offer a unified
+  approach for diverse compute and graphics workloads, providing a robust
+  alternative to the Khronos SPIR-V LLVM Translator.
+
 * ...
 
 <!-- If you would like to document a larger change, then you can add a
@@ -482,6 +488,11 @@ Changes to LLDB
   that port to the connection handler processes. This means that only 2 ports need
   to be opened in the firewall (one for the `lldb-server` platform, one for gdbserver connections).
   In addition, due to this work, `lldb-server` now works on Windows in the server mode.
+  
+* LLDB now supports execution of user expressions for non-trivial cases for RISC-V targets, like function calls, when some code needs to be executed on the target.
+
+* LLDB now supports optionally enabled/disabled register sets (particularly floating point registers) for RISC-V 64. This happens for targets like `RV64IMAC` or `RV64IMACV`,
+  that have no floating point registers. The change is applied to native debugging and core-file usage.
 
 Changes to BOLT
 ---------------------------------
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp
index 1b35ba404d29b..426886c72e54d 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp
@@ -1159,7 +1159,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      auto *FP = Sym.getAddress().toPtr<double (*)()>();
+      auto *FP = Sym.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp
index 1b35ba404d29b..426886c72e54d 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp
@@ -1159,7 +1159,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      auto *FP = Sym.getAddress().toPtr<double (*)()>();
+      auto *FP = Sym.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp
index 1b35ba404d29b..426886c72e54d 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp
@@ -1159,7 +1159,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      auto *FP = Sym.getAddress().toPtr<double (*)()>();
+      auto *FP = Sym.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp
index 2c8d4941291e0..1891635dbfd35 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp
@@ -1157,7 +1157,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      auto *FP = Sym.getAddress().toPtr<double (*)()>();
+      auto *FP = Sym.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/Chapter4/toy.cpp b/llvm/examples/Kaleidoscope/Chapter4/toy.cpp
index 1bbc294bf3526..0f58391c50667 100644
--- a/llvm/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/llvm/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -643,7 +643,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      double (*FP)() = ExprSymbol.getAddress().toPtr<double (*)()>();
+      double (*FP)() = ExprSymbol.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/Chapter5/toy.cpp b/llvm/examples/Kaleidoscope/Chapter5/toy.cpp
index 48936bddb1d4f..7117eaf4982b0 100644
--- a/llvm/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/llvm/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -917,7 +917,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      double (*FP)() = ExprSymbol.getAddress().toPtr<double (*)()>();
+      double (*FP)() = ExprSymbol.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/Chapter6/toy.cpp b/llvm/examples/Kaleidoscope/Chapter6/toy.cpp
index ebe4322287b21..cb7b6cc8651c1 100644
--- a/llvm/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/llvm/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -1036,7 +1036,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      double (*FP)() = ExprSymbol.getAddress().toPtr<double (*)()>();
+      double (*FP)() = ExprSymbol.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/examples/Kaleidoscope/Chapter7/toy.cpp b/llvm/examples/Kaleidoscope/Chapter7/toy.cpp
index 374f2c03b48e0..91b7191a07c6f 100644
--- a/llvm/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/llvm/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -1207,7 +1207,7 @@ static void HandleTopLevelExpression() {
 
       // Get the symbol's address and cast it to the right type (takes no
       // arguments, returns a double) so we can call it as a native function.
-      double (*FP)() = ExprSymbol.getAddress().toPtr<double (*)()>();
+      double (*FP)() = ExprSymbol.toPtr<double (*)()>();
       fprintf(stderr, "Evaluated to %f\n", FP());
 
       // Delete the anonymous expression module from the JIT.
diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index 4049f892fa66e..ce5efa1e06ea6 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -10,6 +10,8 @@
 #define LLVM_ADT_STRING_TABLE_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include <iterator>
 #include <limits>
 
 namespace llvm {
@@ -51,6 +53,14 @@ class StringTable {
     constexpr Offset() = default;
     constexpr Offset(unsigned Value) : Value(Value) {}
 
+    friend constexpr bool operator==(const Offset &LHS, const Offset &RHS) {
+      return LHS.Value == RHS.Value;
+    }
+
+    friend constexpr bool operator!=(const Offset &LHS, const Offset &RHS) {
+      return LHS.Value != RHS.Value;
+    }
+
     constexpr unsigned value() const { return Value; }
   };
 
@@ -69,9 +79,13 @@ class StringTable {
     assert(!Table.empty() && "Requires at least a valid empty string.");
     assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
     // Ensure that `strlen` from any offset cannot overflow the end of the table
-    // by insisting on a null byte at the end.
+    // by insisting on a null byte at the end. We also insist on the last string
+    // within the table being *separately* null terminated. This structure is
+    // used to enable predictable iteration over all the strings when needed.
     assert(Table.data()[Table.size() - 1] == '\0' &&
            "Last byte must be a null byte.");
+    assert(Table.data()[Table.size() - 2] == '\0' &&
+           "Next-to-last byte must be a null byte.");
   }
 
   // Get a string from the table starting with the provided offset. The returned
@@ -84,6 +98,43 @@ class StringTable {
 
   /// Returns the byte size of the table.
   constexpr size_t size() const { return Table.size(); }
+
+  class Iterator
+      : public iterator_facade_base<Iterator, std::forward_iterator_tag,
+                                    const StringRef> {
+    friend StringTable;
+
+    const StringTable *Table;
+    Offset O;
+
+    // A cache of one value to allow `*` to return a reference.
+    mutable StringRef S;
+
+    explicit constexpr Iterator(const StringTable &Table, Offset O)
+        : Table(&Table), O(O) {}
+
+  public:
+    constexpr Iterator(const Iterator &RHS) = default;
+    constexpr Iterator(Iterator &&RHS) = default;
+
+    bool operator==(const Iterator &RHS) const {
+      assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
+      return O == RHS.O;
+    }
+
+    const StringRef &operator*() const {
+      S = (*Table)[O];
+      return S;
+    }
+
+    Iterator &operator++() {
+      O = O.value() + (*Table)[O].size() + 1;
+      return *this;
+    }
+  };
+
+  constexpr Iterator begin() const { return Iterator(*this, 0); }
+  constexpr Iterator end() const { return Iterator(*this, size() - 1); }
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 94e36e412b0cf..9b78342c8fc39 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -840,8 +840,10 @@ class CombinerHelper {
   bool matchRedundantBinOpInEquality(MachineInstr &MI,
                                      BuildFnTy &MatchInfo) const;
 
-  /// Match shifts greater or equal to the bitwidth of the operation.
-  bool matchShiftsTooBig(MachineInstr &MI) const;
+  /// Match shifts greater or equal to the range (the bitwidth of the result
+  /// datatype, or the effective bitwidth of the source value).
+  bool matchShiftsTooBig(MachineInstr &MI,
+                         std::optional<int64_t> &MatchInfo) const;
 
   /// Match constant LHS ops that should be commuted.
   bool matchCommuteConstantToRHS(MachineInstr &MI) const;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 7fe33c3913f2d..0b803a9724742 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -129,7 +129,7 @@ class MachineBasicBlock
   /// clearly as they both have an integer type.
   struct RegisterMaskPair {
   public:
-    MCPhysReg PhysReg;
+    MCRegister PhysReg;
     LaneBitmask LaneMask;
 
     RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 165af902e42d0..bc90364875b68 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1142,13 +1142,14 @@ class TargetInstrInfo : public MCInstrInfo {
   /// register, \p VReg is the register being assigned. This additional register
   /// argument is needed for certain targets when invoked from RegAllocFast to
   /// map the spilled physical register to its virtual register. A null register
-  /// can be passed elsewhere.
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   Register SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI,
-                                   Register VReg) const {
+  /// can be passed elsewhere. The \p Flags is used to set appropriate machine
+  /// flags on the spill instruction e.g. FrameSetup flag on a callee saved
+  /// register spill instruction, part of prologue, during the frame lowering.
+  virtual void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const {
     llvm_unreachable("Target didn't implement "
                      "TargetInstrInfo::storeRegToStackSlot!");
   }
@@ -1160,13 +1161,14 @@ class TargetInstrInfo : public MCInstrInfo {
   /// register, \p VReg is the register being assigned. This additional register
   /// argument is needed for certain targets when invoked from RegAllocFast to
   /// map the loaded physical register to its virtual register. A null register
-  /// can be passed elsewhere.
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MI,
-                                    Register DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI,
-                                    Register VReg) const {
+  /// can be passed elsewhere. The \p Flags is used to set appropriate machine
+  /// flags on the spill instruction e.g. FrameDestroy flag on a callee saved
+  /// register reload instruction, part of epilogue, during the frame lowering.
+  virtual void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const {
     llvm_unreachable("Target didn't implement "
                      "TargetInstrInfo::loadRegFromStackSlot!");
   }
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index a2a9e5d499e52..3e2cd05387926 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -60,6 +60,8 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
                                         const MCSymbol *Sym,
                                         const MachineModuleInfo *MMI) const;
 
+  void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const override;
+
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
@@ -131,6 +133,8 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
   /// Emit the module flags that specify the garbage collection information.
   void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
+  void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const override;
+
   MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
@@ -192,6 +196,8 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
   /// Emit Obj-C garbage collection and linker options.
   void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
+  void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const override;
+
   MCSection *getStaticCtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
   MCSection *getStaticDtorSection(unsigned Priority,
@@ -206,9 +212,6 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                    const Constant *C,
                                    Align &Alignment) const override;
-
-private:
-  void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const;
 };
 
 class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 55f7322029d0f..b09364c74db04 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -49,6 +49,15 @@ struct CallSiteInfo {
   /// Bitwise OR of CallSiteInfo::Flags values
   uint8_t Flags = CallSiteInfo::Flags::None;
 
+  /// Equality comparison operator for CallSiteInfo.
+  bool operator==(const CallSiteInfo &RHS) const {
+    return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex &&
+           Flags == RHS.Flags;
+  }
+
+  /// Inequality comparison operator for CallSiteInfo.
+  bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); }
+
   /// Decode a CallSiteInfo object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c..98483805d066c 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -49,6 +49,33 @@ struct LookupResult {
   /// deepest inline function will appear at index zero in the source locations
   /// array, and the concrete function will appear at the end of the array.
   SourceLocations Locations;
+
+  /// Function name regex patterns associated with a call site at the lookup
+  /// address. This vector will be populated when:
+  /// 1. The lookup address matches a call site's return address in a function
+  /// 2. The call site has associated regex patterns that describe what
+  /// functions can be called from that location
+  ///
+  /// The regex patterns can be used to validate function calls during runtime
+  /// checking or symbolication. For example:
+  /// - Patterns like "^foo$" indicate the call site can only call function
+  /// "foo"
+  /// - Patterns like "^std::" indicate the call site can call any function in
+  ///   the std namespace
+  /// - Multiple patterns allow matching against a set of allowed functions
+  ///
+  /// The patterns are stored as string references into the GSYM string table.
+  /// This information is typically loaded from:
+  /// - DWARF debug info call site entries
+  /// - External YAML files specifying call site patterns
+  /// - Other debug info formats that encode call site constraints
+  ///
+  /// The patterns will be empty if:
+  /// - The lookup address is not at the return address of a call site
+  /// - The call site has no associated function name constraints
+  /// - Call site info was not included when creating the GSYM file
+  std::vector<StringRef> CallSiteFuncRegex;
+
   std::string getSourceFile(uint32_t Index) const;
 };
 
@@ -59,6 +86,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
     return false;
   if (LHS.FuncName != RHS.FuncName)
     return false;
+  if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex)
+    return false;
   return LHS.Locations == RHS.Locations;
 }
 
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index bd82fadea1027..297e603164b24 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -754,6 +754,10 @@ class Section {
   /// Returns the ordinal for this section.
   SectionOrdinal getOrdinal() const { return SecOrdinal; }
 
+  /// Set the ordinal for this section. Ordinals are used to order the layout
+  /// of sections with the same permissions.
+  void setOrdinal(SectionOrdinal SecOrdinal) { this->SecOrdinal = SecOrdinal; }
+
   /// Returns true if this section is empty (contains no blocks or symbols).
   bool empty() const { return Blocks.empty(); }
 
@@ -1126,7 +1130,7 @@ class LinkGraph {
     return MutableArrayRef<char>(AllocatedBuffer, SourceStr.size() + 1);
   }
 
-  /// Create a section with the given name, protection flags, and alignment.
+  /// Create a section with the given name, protection flags.
   Section &createSection(StringRef Name, orc::MemProt Prot) {
     assert(!Sections.count(Name) && "Duplicate section name");
     std::unique_ptr<Section> Sec(new Section(Name, Prot, Sections.size()));
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
index 2010b32cdf766..b3bf96b8549f2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
@@ -55,6 +55,9 @@ inline Section &getMachODefaultTextSection(LinkGraph &G) {
                          orc::MemProt::Read | orc::MemProt::Exec);
 }
 
+/// Gets or creates a MachO header for the current LinkGraph.
+Expected<Symbol &> getOrCreateLocalMachOHeader(LinkGraph &G);
+
 } // end namespace jitlink
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index ebff2106e9d72..84904b1710945 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -244,8 +244,6 @@ class LLJIT {
 
   Error applyDataLayout(Module &M);
 
-  void recordCtorDtors(Module &M);
-
   std::unique_ptr<ExecutionSession> ES;
   std::unique_ptr<PlatformSupport> PS;
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index c9f7178ebcadb..6e99f6c03a7c6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -259,6 +259,7 @@ class MachOPlatform : public Platform {
 
     std::optional<UnwindSections> findUnwindSectionInfo(jitlink::LinkGraph &G);
     Error registerObjectPlatformSections(jitlink::LinkGraph &G, JITDylib &JD,
+                                         ExecutorAddr HeaderAddr,
                                          bool InBootstrapPhase);
 
     Error createObjCRuntimeObject(jitlink::LinkGraph &G);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h
index 68ccdf83bd120..0756ab5ea9881 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h
@@ -23,6 +23,37 @@ namespace orc {
 /// Represents a defining location for a JIT symbol.
 class ExecutorSymbolDef {
 public:
+  /// Create an ExecutorSymbolDef from the given pointer.
+  /// Warning: This should only be used when JITing in-process.
+  template <typename T, typename UnwrapFn = ExecutorAddr::defaultUnwrap<T>>
+  static ExecutorSymbolDef fromPtr(T *Ptr,
+                                   JITSymbolFlags BaseFlags = JITSymbolFlags(),
+                                   UnwrapFn &&Unwrap = UnwrapFn()) {
+    auto *UP = Unwrap(Ptr);
+    JITSymbolFlags Flags = BaseFlags;
+    if (std::is_function_v<T>)
+      Flags |= JITSymbolFlags::Callable;
+    return ExecutorSymbolDef(
+        ExecutorAddr::fromPtr(UP, ExecutorAddr::rawPtr<T>()), Flags);
+  }
+
+  /// Cast this ExecutorSymbolDef to a pointer of the given type.
+  /// Warning: This should only be used when JITing in-process.
+  template <typename T, typename WrapFn =
+                            ExecutorAddr::defaultWrap<std::remove_pointer_t<T>>>
+  std::enable_if_t<std::is_pointer<T>::value, T>
+  toPtr(WrapFn &&Wrap = WrapFn()) const {
+    return Addr.toPtr<T>(std::forward<WrapFn>(Wrap));
+  }
+
+  /// Cast this ExecutorSymbolDef to a pointer of the given function type.
+  /// Warning: This should only be used when JITing in-process.
+  template <typename T, typename WrapFn = ExecutorAddr::defaultWrap<T>>
+  std::enable_if_t<std::is_function<T>::value, T *>
+  toPtr(WrapFn &&Wrap = WrapFn()) const {
+    return Addr.toPtr<T>(std::forward<WrapFn>(Wrap));
+  }
+
   ExecutorSymbolDef() = default;
   ExecutorSymbolDef(ExecutorAddr Addr, JITSymbolFlags Flags)
     : Addr(Addr), Flags(Flags) {}
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index abaea843848b2..f0bde5d81ef6d 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -21,6 +21,16 @@
 namespace llvm {
 namespace offloading {
 
+/// This is the record of an object that just be registered with the offloading
+/// runtime.
+struct EntryTy {
+  void *Address;
+  char *SymbolName;
+  size_t Size;
+  int32_t Flags;
+  int32_t Data;
+};
+
 /// Offloading entry flags for CUDA / HIP. The first three bits indicate the
 /// type of entry while the others are a bit field for additional information.
 enum OffloadEntryKindFlag : uint32_t {
@@ -48,15 +58,6 @@ StructType *getEntryTy(Module &M);
 /// Create an offloading section struct used to register this global at
 /// runtime.
 ///
-/// Type struct __tgt_offload_entry {
-///   void    *addr;      // Pointer to the offload entry info.
-///                       // (function or global)
-///   char    *name;      // Name of the function or global.
-///   size_t  size;       // Size of the entry info (0 if it a function).
-///   int32_t flags;
-///   int32_t data;
-/// };
-///
 /// \param M The module to be used
 /// \param Addr The pointer to the global being registered.
 /// \param Name The symbol name associated with the global.
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b529642a55871..cc3584833202b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2563,9 +2563,10 @@ def int_amdgcn_buffer_wbinvl1_vol :
 // VI Intrinsics
 //===----------------------------------------------------------------------===//
 
-// The llvm.amdgcn.mov.dpp.i32 intrinsic represents the mov.dpp operation in AMDGPU.
-// This operation is being deprecated and can be replaced with llvm.amdgcn.update.dpp.i32.
-// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
+// The llvm.amdgcn.mov.dpp intrinsic represents the mov.dpp operation in AMDGPU.
+// This operation is being deprecated and can be replaced with
+// llvm.amdgcn.update.dpp.
+// llvm.amdgcn.mov.dpp <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
 def int_amdgcn_mov_dpp :
   Intrinsic<[llvm_anyint_ty],
             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
@@ -2574,11 +2575,12 @@ def int_amdgcn_mov_dpp :
              ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>,
              ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
 
-// The llvm.amdgcn.update.dpp.i32 intrinsic represents the update.dpp operation in AMDGPU.
-// It takes an old value, a source operand, a DPP control operand, a row mask, a bank mask, and a bound control.
-// This operation is equivalent to a sequence of v_mov_b32 operations.
-// It is preferred over llvm.amdgcn.mov.dpp.i32 for future use.
-// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
+// The llvm.amdgcn.update.dpp intrinsic represents the update.dpp operation in
+// AMDGPU. It takes an old value, a source operand, a DPP control operand, a row
+// mask, a bank mask, and a bound control. This operation is equivalent to a
+// sequence of v_mov_b32 operations. It is preferred over llvm.amdgcn.mov.dpp
+// for future use.
+// llvm.amdgcn.update.dpp <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
 // Should be equivalent to:
 // v_mov_b32 <dest> <old>
 // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
@@ -2662,7 +2664,7 @@ def int_amdgcn_permlanex16 :
             [IntrNoMem, IntrConvergent, IntrWillReturn,
              ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
 
-// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
+// llvm.amdgcn.mov.dpp8 <src> <sel>
 // <sel> is a 32-bit constant whose high 8 bits must be zero which selects
 // the lanes to read from.
 def int_amdgcn_mov_dpp8 :
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index fb12949e10c7e..4bac5cd61084a 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7279,13 +7279,13 @@ let TargetPrefix = "x86" in {
 }
 
 let TargetPrefix = "x86" in {
-def int_x86_avx10_vminmaxnepbf16128 : ClangBuiltin<"__builtin_ia32_vminmaxnepbf16128">,
+def int_x86_avx10_vminmaxbf16128 : ClangBuiltin<"__builtin_ia32_vminmaxbf16128">,
         DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i32_ty],
                   [IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx10_vminmaxnepbf16256 : ClangBuiltin<"__builtin_ia32_vminmaxnepbf16256">,
+def int_x86_avx10_vminmaxbf16256 : ClangBuiltin<"__builtin_ia32_vminmaxbf16256">,
         DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i32_ty],
                   [IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx10_vminmaxnepbf16512 : ClangBuiltin<"__builtin_ia32_vminmaxnepbf16512">,
+def int_x86_avx10_vminmaxbf16512 : ClangBuiltin<"__builtin_ia32_vminmaxbf16512">,
         DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty],
                   [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 def int_x86_avx10_vminmaxpd128 : ClangBuiltin<"__builtin_ia32_vminmaxpd128">,
diff --git a/llvm/include/llvm/Passes/DroppedVariableStatsIR.h b/llvm/include/llvm/Passes/DroppedVariableStatsIR.h
index 99701e8c8e1c0..18847b5c1ead8 100644
--- a/llvm/include/llvm/Passes/DroppedVariableStatsIR.h
+++ b/llvm/include/llvm/Passes/DroppedVariableStatsIR.h
@@ -28,12 +28,12 @@ class DroppedVariableStatsIR : public DroppedVariableStats {
   DroppedVariableStatsIR(bool DroppedVarStatsEnabled)
       : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {}
 
-  void runBeforePass(Any IR) {
+  void runBeforePass(StringRef P, Any IR) {
     setup();
     if (const auto *M = unwrapIR<Module>(IR))
-      return this->runOnModule(M, true);
+      return this->runOnModule(P, M, true);
     if (const auto *F = unwrapIR<Function>(IR))
-      return this->runOnFunction(F, true);
+      return this->runOnFunction(P, F, true);
   }
 
   void runAfterPass(StringRef P, Any IR) {
@@ -50,19 +50,19 @@ class DroppedVariableStatsIR : public DroppedVariableStats {
   const Function *Func;
 
   void runAfterPassFunction(StringRef PassID, const Function *F) {
-    runOnFunction(F, false);
+    runOnFunction(PassID, F, false);
     calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(),
                                        "Function");
   }
 
   void runAfterPassModule(StringRef PassID, const Module *M) {
-    runOnModule(M, false);
+    runOnModule(PassID, M, false);
     calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module");
   }
   /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or
   /// after a pass has run to facilitate dropped variable calculation for an
   /// llvm::Function.
-  void runOnFunction(const Function *F, bool Before);
+  void runOnFunction(StringRef PassID, const Function *F, bool Before);
   /// Iterate over all Instructions in a Function and report any dropped debug
   /// information.
   void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID,
@@ -71,7 +71,7 @@ class DroppedVariableStatsIR : public DroppedVariableStats {
   /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or
   /// after a pass has run to facilitate dropped variable calculation for an
   /// llvm::Module. Calls runOnFunction on every Function in the Module.
-  void runOnModule(const Module *M, bool Before);
+  void runOnModule(StringRef PassID, const Module *M, bool Before);
   /// Iterate over all Functions in a Module and report any dropped debug
   /// information. Will call calculateDroppedVarStatsOnFunction on every
   /// Function.
diff --git a/llvm/include/llvm/SandboxIR/Type.h b/llvm/include/llvm/SandboxIR/Type.h
index c7a8943632bae..ec32284dacd61 100644
--- a/llvm/include/llvm/SandboxIR/Type.h
+++ b/llvm/include/llvm/SandboxIR/Type.h
@@ -292,8 +292,6 @@ class PointerType : public Type {
 public:
   // TODO: add missing functions
 
-  // TODO: Remove non-opaque variant of sandboxir::PointerType::get
-  static PointerType *get(Type *ElementType, unsigned AddressSpace);
   static PointerType *get(Context &Ctx, unsigned AddressSpace);
 
   static bool classof(const Type *From) {
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index a4a680c97a079..d3f789dd00269 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -558,6 +558,22 @@ class DominatorTreeBase {
     return isPostDominator() && !A->getBlock();
   }
 
+  template <typename IteratorTy>
+  NodeT *findNearestCommonDominator(iterator_range<IteratorTy> Nodes) const {
+    assert(!Nodes.empty() && "Nodes list is empty!");
+
+    NodeT *NCD = *Nodes.begin();
+    for (NodeT *Node : llvm::drop_begin(Nodes)) {
+      NCD = findNearestCommonDominator(NCD, Node);
+
+      // Stop when the root is reached.
+      if (isVirtualRoot(getNode(NCD)))
+        return nullptr;
+    }
+
+    return NCD;
+  }
+
   //===--------------------------------------------------------------------===//
   // API to update (Post)DominatorTree information based on modifications to
   // the CFG...
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 8641eabbdd84c..3590ab221ad44 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -306,11 +306,23 @@ def ptr_add_immed_chain : GICombineRule<
          [{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
   (apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
 
+def shift_const_op : GICombinePatFrag<
+  (outs root:$dst), (ins),
+  !foreach(op,
+           [G_SHL, G_ASHR, G_LSHR],
+           (pattern (op $dst, $shifted, $amt)))>;
+def shift_result_matchdata : GIDefMatchData<"std::optional<int64_t>">;
 def shifts_too_big : GICombineRule<
-  (defs root:$root),
-  (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
-         [{ return Helper.matchShiftsTooBig(*${root}); }]),
-  (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+  (defs root:$root, shift_result_matchdata:$matchinfo),
+  (match (shift_const_op $root):$mi,
+         [{ return Helper.matchShiftsTooBig(*${mi}, ${matchinfo}); }]),
+  (apply [{
+    if (${matchinfo}) {
+      Helper.replaceInstWithConstant(*${mi}, *${matchinfo});
+    } else {
+      Helper.replaceInstWithUndef(*${mi});
+    }
+  }])>;
 
 // Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
 def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 3e037affe1cfd..f568a64971f09 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -100,7 +100,7 @@ class SubRegIndex<int size, int offset = 0> {
 
   // The size/offset information, parameterized by a HW mode.
   // If the HwModes provided for SubRegRanges does not include the DefaultMode,
-  // the/ Size and Offset fields below will be used for the default. Otherwise,
+  // the Size and Offset fields below will be used for the default. Otherwise,
   // the Size and Offset fields are ignored.
   SubRegRangeByHwMode SubRegRanges;
 
@@ -159,13 +159,13 @@ class RegAltNameIndex {
 def NoRegAltName : RegAltNameIndex;
 
 // Register - You should define one instance of this class for each register
-// in the target machine.  String n will become the "name" of the register.
+// in the target machine. String n will become the "name" of the register.
 class Register<string n, list<string> altNames = []> {
   string Namespace = "";
   string AsmName = n;
   list<string> AltNames = altNames;
 
-  // Aliases - A list of registers that this register overlaps with.  A read or
+  // Aliases - A list of registers that this register overlaps with. A read or
   // modification of this register can potentially read or modify the aliased
   // registers.
   list<Register> Aliases = [];
@@ -187,8 +187,8 @@ class Register<string n, list<string> altNames = []> {
 
   // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
   // These values can be determined by locating the <target>.h file in the
-  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
-  // order of these names correspond to the enumeration used by gcc.  A value of
+  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES. The
+  // order of these names correspond to the enumeration used by gcc. A value of
   // -1 indicates that the gcc number is undefined and -2 that register number
   // is invalid for this mode/flavour.
   list<int> DwarfNumbers = [];
@@ -209,7 +209,7 @@ class Register<string n, list<string> altNames = []> {
   list<int> CostPerUse = [0];
 
   // CoveredBySubRegs - When this bit is set, the value of this register is
-  // completely determined by the value of its sub-registers.  For example, the
+  // completely determined by the value of its sub-registers. For example, the
   // x86 register AX is covered by its sub-registers AL and AH, but EAX is not
   // covered by its sub-register AX.
   bit CoveredBySubRegs = false;
@@ -239,7 +239,7 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
 }
 
 // DAGOperand - An empty base class that unifies RegisterClass's and other forms
-// of Operand's that are legal as type qualifiers in DAG patterns.  This should
+// of Operand's that are legal as type qualifiers in DAG patterns. This should
 // only ever be used for defining multiclasses that are polymorphic over both
 // RegisterClass's and other Operand's.
 class DAGOperand {
@@ -249,7 +249,7 @@ class DAGOperand {
 
 // RegisterClass - Now that all of the registers are defined, and aliases
 // between registers are defined, specify which registers belong to which
-// register classes.  This also defines the default allocation order of
+// register classes. This also defines the default allocation order of
 // registers by register allocators.
 //
 class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
@@ -261,14 +261,14 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   RegInfoByHwMode RegInfos;
 
   // RegType - Specify the list ValueType of the registers in this register
-  // class.  Note that all registers in a register class must have the same
-  // ValueTypes.  This is a list because some targets permit storing different
+  // class. Note that all registers in a register class must have the same
+  // ValueTypes. This is a list because some targets permit storing different
   // types in same register, for example vector values with 128-bit total size,
   // but different count/size of items, like SSE on x86.
   //
   list<ValueType> RegTypes = regTypes;
 
-  // Size - Specify the spill size in bits of the registers.  A default value of
+  // Size - Specify the spill size in bits of the registers. A default value of
   // zero lets tablegen pick an appropriate size.
   int Size = 0;
 
@@ -283,7 +283,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // value means copying is extremely expensive or impossible.
   int CopyCost = 1;
 
-  // MemberList - Specify which registers are in this class.  If the
+  // MemberList - Specify which registers are in this class. If the
   // allocation_order_* method are not specified, this also defines the order of
   // allocation used by the register allocator.
   //
@@ -295,7 +295,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   RegAltNameIndex altNameIndex = idx;
 
   // isAllocatable - Specify that the register class can be used for virtual
-  // registers and register allocation.  Some register classes are only used to
+  // registers and register allocation. Some register classes are only used to
   // model instruction operand constraints, and should have isAllocatable = 0.
   bit isAllocatable = true;
 
@@ -356,9 +356,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   bits<8> TSFlags = 0;
 
   // If set then consider this register class to be the base class for registers in
-  // its MemberList.  The base class for registers present in multiple base register
+  // its MemberList. The base class for registers present in multiple base register
   // classes will be resolved in the order defined by this value, with lower values
-  // taking precedence over higher ones.  Ties are resolved by enumeration order.
+  // taking precedence over higher ones. Ties are resolved by enumeration order.
   int BaseClassOrder = ?;
 }
 
@@ -377,7 +377,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
 // also in the second set.
 //
 // (sequence "R%u", 0, 15) -> [R0, R1, ..., R15]. Generate a sequence of
-// numbered registers.  Takes an optional 4th operand which is a stride to use
+// numbered registers. Takes an optional 4th operand which is a stride to use
 // when generating the sequence.
 //
 // (shl GPR, 4) - Remove the first N elements.
@@ -455,14 +455,14 @@ class RegisterCategory<list<RegisterClass> classes> {
 
 //===----------------------------------------------------------------------===//
 // DwarfRegNum - This class provides a mapping of the llvm register enumeration
-// to the register numbering used by gcc and gdb.  These values are used by a
+// to the register numbering used by gcc and gdb. These values are used by a
 // debug information writer to describe where values may be located during
 // execution.
 class DwarfRegNum<list<int> Numbers> {
   // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
   // These values can be determined by locating the <target>.h file in the
-  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
-  // order of these names correspond to the enumeration used by gcc.  A value of
+  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES. The
+  // order of these names correspond to the enumeration used by gcc. A value of
   // -1 indicates that the gcc number is undefined and -2 that register number
   // is invalid for this mode/flavour.
   list<int> DwarfNumbers = Numbers;
@@ -481,7 +481,7 @@ class DwarfRegAlias<Register reg> {
 //
 class SubtargetFeature<string n, string f, string v, string d,
                        list<SubtargetFeature> i = []> {
-  // Name - Feature name.  Used by command line (-mattr=) to determine the
+  // Name - Feature name. Used by command line (-mattr=) to determine the
   // appropriate target chip.
   //
   string Name = n;
@@ -500,7 +500,7 @@ class SubtargetFeature<string n, string f, string v, string d,
   //
   string Value = v;
 
-  // Desc - Feature description.  Used by command line (-mattr=) to display help
+  // Desc - Feature description. Used by command line (-mattr=) to display help
   // information.
   //
   string Desc = d;
@@ -818,7 +818,7 @@ class Predicate<string cond> {
   string CondString = cond;
 
   /// AssemblerMatcherPredicate - If this feature can be used by the assembler
-  /// matcher, this is true.  Targets should set this by inheriting their
+  /// matcher, this is true. Targets should set this by inheriting their
   /// feature from the AssemblerPredicate class in addition to Predicate.
   bit AssemblerMatcherPredicate = false;
 
@@ -906,7 +906,7 @@ def encoder;
 def decoder;
 
 /// PointerLikeRegClass - Values that are designed to have pointer width are
-/// derived from this.  TableGen treats the register class as having a symbolic
+/// derived from this. TableGen treats the register class as having a symbolic
 /// type that it doesn't know, and resolves the actual regclass to use by using
 /// the TargetRegisterInfo::getPointerRegClass() hook at codegen time.
 class PointerLikeRegClass<int Kind> {
@@ -992,7 +992,7 @@ def ImmAsmOperand : AsmOperandClass {
 }
 
 /// Operand Types - These provide the built-in operand types that may be used
-/// by a target.  Targets can optionally provide their own operand types as
+/// by a target. Targets can optionally provide their own operand types as
 /// needed, though this should not be needed for RISC targets.
 class Operand<ValueType ty> : DAGOperand {
   ValueType Type = ty;
@@ -1107,7 +1107,7 @@ class PredicateOp;
 
 /// OperandWithDefaultOps - This Operand class can be used as the parent class
 /// for an Operand that needs to be initialized with a default value if
-/// no value is supplied in a pattern.  This class can be used to simplify the
+/// no value is supplied in a pattern. This class can be used to simplify the
 /// pattern definitions for instructions that have target specific flags
 /// encoded as immediate operands.
 class OperandWithDefaultOps<ValueType ty, dag defaultops>
@@ -1116,7 +1116,7 @@ class OperandWithDefaultOps<ValueType ty, dag defaultops>
 }
 
 /// PredicateOperand - This can be used to define a predicate operand for an
-/// instruction.  OpTypes specifies the MIOperandInfo for the operand, and
+/// instruction. OpTypes specifies the MIOperandInfo for the operand, and
 /// AlwaysVal specifies the value of this predicate when set to "always
 /// execute".
 class PredicateOperand<ValueType ty, dag OpTypes, dag AlwaysVal>
@@ -1559,7 +1559,7 @@ include "llvm/Target/GenericOpcodes.td"
 //
 class AsmParser {
   // AsmParserClassName - This specifies the suffix to use for the asmparser
-  // class.  Generated AsmParser classes are always prefixed with the target
+  // class. Generated AsmParser classes are always prefixed with the target
   // name.
   string AsmParserClassName  = "AsmParser";
 
@@ -1632,7 +1632,7 @@ def DefaultAsmParser : AsmParser;
 // implemented by targets to describe such variants.
 //
 class AsmParserVariant {
-  // Variant - AsmParsers can be of multiple different variants.  Variants are
+  // Variant - AsmParsers can be of multiple different variants. Variants are
   // used to support targets that need to parse multiple formats for the
   // assembly language.
   int Variant = 0;
@@ -1690,8 +1690,8 @@ class TokenAlias<string From, string To> {
 }
 
 /// MnemonicAlias - This class allows targets to define assembler mnemonic
-/// aliases.  This should be used when all forms of one mnemonic are accepted
-/// with a different mnemonic.  For example, X86 allows:
+/// aliases. This should be used when all forms of one mnemonic are accepted
+/// with a different mnemonic. For example, X86 allows:
 ///   sal %al, 1    -> shl %al, 1
 ///   sal %ax, %cl  -> shl %ax, %cl
 ///   sal %eax, %cl -> shl %eax, %cl
@@ -1769,11 +1769,11 @@ class AsmWriter {
   // FIXME: Remove after all ports are updated.
   int PassSubtarget = 0;
 
-  // Variant - AsmWriters can be of multiple different variants.  Variants are
+  // Variant - AsmWriters can be of multiple different variants. Variants are
   // used to support targets that need to emit assembly code in ways that are
   // mostly the same for different targets, but have minor differences in
-  // syntax.  If the asmstring contains {|} characters in them, this integer
-  // will specify which alternative to use.  For example "{x|y|z}" with Variant
+  // syntax. If the asmstring contains {|} characters in them, this integer
+  // will specify which alternative to use. For example "{x|y|z}" with Variant
   // == 1, will expand to "y".
   int Variant = 0;
 }
@@ -1798,7 +1798,7 @@ class Target {
   list<AsmWriter> AssemblyWriters = [DefaultAsmWriter];
 
   // AllowRegisterRenaming - Controls whether this target allows
-  // post-register-allocation renaming of registers.  This is done by
+  // post-register-allocation renaming of registers. This is done by
   // setting hasExtraDefRegAllocReq and hasExtraSrcRegAllocReq to 1
   // for all opcodes if this flag is set to 0.
   int AllowRegisterRenaming = 0;
@@ -1806,12 +1806,12 @@ class Target {
 
 //===----------------------------------------------------------------------===//
 // Processor chip sets - These values represent each of the chip sets supported
-// by the scheduler.  Each Processor definition requires corresponding
+// by the scheduler. Each Processor definition requires corresponding
 // instruction itineraries.
 //
 class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f,
                 list<SubtargetFeature> tunef = []> {
-  // Name - Chip set name.  Used by command line (-mcpu=) to determine the
+  // Name - Chip set name. Used by command line (-mcpu=) to determine the
   // appropriate target chip.
   //
   string Name = n;
@@ -1829,7 +1829,7 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f,
 
   // TuneFeatures - list of features for tuning for this CPU. If the target
   // supports -mtune, this should contain the list of features used to make
-  // microarchitectural optimization decisions for a given processor.  While
+  // microarchitectural optimization decisions for a given processor. While
   // Features should contain the architectural features for the processor.
   list<SubtargetFeature> TuneFeatures = tunef;
 }
@@ -1869,7 +1869,7 @@ class InstrMapping {
   //
   // def Add: { let BaseOp = 'ADD'; let predSense = 'nopred' }
   // def Add_predtrue: { let BaseOp = 'ADD'; let predSense = 'true' }
-  // def Add_predfalse: { let BaseOp = 'ADD'; let predSense = 'false'  }
+  // def Add_predfalse: { let BaseOp = 'ADD'; let predSense = 'false' }
   list<string> RowFields = [];
 
   // List of fields/attributes that are same for all the instructions
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 4864ba843f488..b523d6a47b41a 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -91,6 +91,9 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   /// Emit Call Graph Profile metadata.
   void emitCGProfileMetadata(MCStreamer &Streamer, Module &M) const;
 
+  /// Process linker options metadata and emit platform-specific bits.
+  virtual void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const {}
+
   /// Get the module-level metadata that the platform cares about.
   virtual void getModuleMetadata(Module &M) {}
 
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index bee0a4298c786..7f3c2be90d820 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -353,6 +353,8 @@ class SDNode<string opcode, SDTypeProfile typeprof,
   string SDClass = sdclass;
   let Properties = props;
   SDTypeProfile TypeProfile = typeprof;
+  bit IsStrictFP = false;
+  bits<32> TSFlags = 0;
 }
 
 // Special TableGen-recognized dag nodes
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 72fda911962ad..3c5cf1ebe6ba2 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -389,27 +389,21 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
   }
 
-  /// Returns true if the loop has an uncountable early exit, i.e. an
+  /// Returns true if the loop has exactly one uncountable early exit, i.e. an
   /// uncountable exit that isn't the latch block.
-  bool hasUncountableEarlyExit() const { return HasUncountableEarlyExit; }
+  bool hasUncountableEarlyExit() const {
+    return getUncountableEdge().has_value();
+  }
 
-  /// Returns the uncountable early exiting block.
+  /// Returns the uncountable early exiting block, if there is exactly one.
   BasicBlock *getUncountableEarlyExitingBlock() const {
-    if (!HasUncountableEarlyExit) {
-      assert(getUncountableExitingBlocks().empty() &&
-             "Expected no uncountable exiting blocks");
-      return nullptr;
-    }
-    assert(getUncountableExitingBlocks().size() == 1 &&
-           "Expected only a single uncountable exiting block");
-    return getUncountableExitingBlocks()[0];
+    return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr;
   }
 
-  /// Returns the destination of an uncountable early exiting block.
+  /// Returns the destination of the uncountable early exiting block, if there
+  /// is exactly one.
   BasicBlock *getUncountableEarlyExitBlock() const {
-    assert(getUncountableExitBlocks().size() == 1 &&
-           "Expected only a single uncountable exit block");
-    return getUncountableExitBlocks()[0];
+    return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
   }
 
   /// Returns true if vector representation of the instruction \p I
@@ -463,14 +457,11 @@ class LoopVectorizationLegality {
     return CountableExitingBlocks;
   }
 
-  /// Returns all the exiting blocks with an uncountable exit.
-  const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const {
-    return UncountableExitingBlocks;
-  }
-
-  /// Returns all the exit blocks from uncountable exiting blocks.
-  SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const {
-    return UncountableExitBlocks;
+  /// Returns the loop edge to an uncountable exit, or std::nullopt if there
+  /// isn't a single such edge.
+  std::optional<std::pair<BasicBlock *, BasicBlock *>>
+  getUncountableEdge() const {
+    return UncountableEdge;
   }
 
 private:
@@ -654,18 +645,13 @@ class LoopVectorizationLegality {
   /// supported.
   bool StructVecCallFound = false;
 
-  /// Indicates whether this loop has an uncountable early exit, i.e. an
-  /// uncountable exiting block that is not the latch.
-  bool HasUncountableEarlyExit = false;
-
   /// Keep track of all the countable and uncountable exiting blocks if
   /// the exact backedge taken count is not computable.
   SmallVector<BasicBlock *, 4> CountableExitingBlocks;
-  SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
 
-  /// Keep track of the destinations of all uncountable exits if the
-  /// exact backedge taken count is not computable.
-  SmallVector<BasicBlock *, 4> UncountableExitBlocks;
+  /// Keep track of the loop edge to an uncountable exit, comprising a pair
+  /// of (Exiting, Exit) blocks, if there is exactly one early exit.
+  std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 8906dc4e5cacf..8077c28f79a39 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1176,10 +1176,12 @@ void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src,
 
 void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) {
   assert(Src->getTerminator()->getNumSuccessors() == 2);
-  if (!Probs.contains(std::make_pair(Src, 0)))
+  auto It0 = Probs.find(std::make_pair(Src, 0));
+  if (It0 == Probs.end())
     return; // No probability is set for edges from Src
-  assert(Probs.contains(std::make_pair(Src, 1)));
-  std::swap(Probs[std::make_pair(Src, 0)], Probs[std::make_pair(Src, 1)]);
+  auto It1 = Probs.find(std::make_pair(Src, 1));
+  assert(It1 != Probs.end());
+  std::swap(It0->second, It1->second);
 }
 
 raw_ostream &
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 1a1e6f0117e2b..663be65b7b9a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -49,26 +49,32 @@ static bool isPossibleIndirectCallTarget(const Function *F) {
     const Value *FnOrCast = Users.pop_back_val();
     for (const Use &U : FnOrCast->uses()) {
       const User *FnUser = U.getUser();
-      if (isa<BlockAddress>(FnUser))
+      if (isa<BlockAddress>(FnUser)) {
+        // Block addresses are illegal to call.
         continue;
+      }
       if (const auto *Call = dyn_cast<CallBase>(FnUser)) {
-        if (!Call->isCallee(&U))
+        if ((!Call->isCallee(&U) || U.get() != F) &&
+            !Call->getFunction()->getName().ends_with("$exit_thunk")) {
+          // Passing a function pointer to a call may lead to an indirect
+          // call. As an exception, ignore ARM64EC exit thunks.
           return true;
+        }
       } else if (isa<Instruction>(FnUser)) {
         // Consider any other instruction to be an escape. This has some weird
         // consequences like no-op intrinsics being an escape or a store *to* a
         // function address being an escape.
         return true;
-      } else if (const auto *C = dyn_cast<Constant>(FnUser)) {
-        // If this is a constant pointer cast of the function, don't consider
-        // this escape. Analyze the uses of the cast as well. This ensures that
-        // direct calls with mismatched prototypes don't end up in the CFG
-        // table. Consider other constants, such as vtable initializers, to
-        // escape the function.
-        if (C->stripPointerCasts() == F)
-          Users.push_back(FnUser);
-        else
-          return true;
+      } else if (const auto *G = dyn_cast<GlobalValue>(FnUser)) {
+        // Ignore llvm.arm64ec.symbolmap; it doesn't lower to an actual address.
+        if (G->getName() == "llvm.arm64ec.symbolmap")
+          continue;
+        // Globals (for example, vtables) are escapes.
+        return true;
+      } else if (isa<Constant>(FnUser)) {
+        // Constants which aren't a global are intermediate values; recursively
+        // analyze the users to see if they actually escape.
+        Users.push_back(FnUser);
       }
     }
   }
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index bc1a65064a8c5..65476fa05a203 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -381,7 +381,7 @@ void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
       // full registers:
       assert(P.LaneMask == LaneBitmask::getAll() &&
              "Can only handle full register.");
-      MCPhysReg Reg = P.PhysReg;
+      MCRegister Reg = P.PhysReg;
       if (!LiveRegs.available(*MRI, Reg))
         continue;
       DebugLoc DL;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4e3aaf5da7198..b193d8bb0aa18 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6590,12 +6590,57 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
   return CmpInst::isEquality(Pred) && Y.isValid();
 }
 
-bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) const {
+/// Return the minimum useless shift amount that results in complete loss of the
+/// source value. Return std::nullopt when it cannot determine a value.
+static std::optional<unsigned>
+getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
+                   std::optional<int64_t> &Result) {
+  assert(Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
+         Opcode == TargetOpcode::G_ASHR && "Expect G_SHL, G_LSHR or G_ASHR.");
+  auto SignificantBits = 0;
+  switch (Opcode) {
+  case TargetOpcode::G_SHL:
+    SignificantBits = ValueKB.countMinTrailingZeros();
+    Result = 0;
+    break;
+  case TargetOpcode::G_LSHR:
+    Result = 0;
+    SignificantBits = ValueKB.countMinLeadingZeros();
+    break;
+  case TargetOpcode::G_ASHR:
+    if (ValueKB.isNonNegative()) {
+      SignificantBits = ValueKB.countMinLeadingZeros();
+      Result = 0;
+    } else if (ValueKB.isNegative()) {
+      SignificantBits = ValueKB.countMinLeadingOnes();
+      Result = -1;
+    } else {
+      // Cannot determine shift result.
+      Result = std::nullopt;
+    }
+    break;
+  default:
+    break;
+  }
+  return ValueKB.getBitWidth() - SignificantBits;
+}
+
+bool CombinerHelper::matchShiftsTooBig(
+    MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
+  Register ShiftVal = MI.getOperand(1).getReg();
   Register ShiftReg = MI.getOperand(2).getReg();
   LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
   auto IsShiftTooBig = [&](const Constant *C) {
     auto *CI = dyn_cast<ConstantInt>(C);
-    return CI && CI->uge(ResTy.getScalarSizeInBits());
+    if (!CI)
+      return false;
+    if (CI->uge(ResTy.getScalarSizeInBits())) {
+      MatchInfo = std::nullopt;
+      return true;
+    }
+    auto OptMaxUsefulShift = getMinUselessShift(KB->getKnownBits(ShiftVal),
+                                                MI.getOpcode(), MatchInfo);
+    return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
   };
   return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
 }
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index 96380d4084825..2ba17e46be5a6 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -154,7 +154,7 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
 /// Add live-in registers of basic block \p MBB to \p LiveRegs.
 void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
   for (const auto &LI : MBB.liveins()) {
-    MCPhysReg Reg = LI.PhysReg;
+    MCRegister Reg = LI.PhysReg;
     LaneBitmask Mask = LI.LaneMask;
     MCSubRegIndexIterator S(Reg, TRI);
     assert(Mask.any() && "Invalid livein mask");
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 55428ab7832de..00dae84b5840b 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -576,7 +576,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) {
   // Mark live-in registers as live-in.
   SmallVector<Register, 4> Defs;
   for (const auto &LI : MBB->liveins()) {
-    assert(Register::isPhysicalRegister(LI.PhysReg) &&
+    assert(LI.PhysReg.isPhysical() &&
            "Cannot have a live-in virtual register!");
     HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
   }
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 05bc4cf646f42..c61b8eb5e7b9c 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3178,11 +3178,11 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
     // Conservative default.
     bool InWorkList = true;
     // Remove from the Chain and Chain Map
-    if (BlockToChain.count(RemBB)) {
-      BlockChain *Chain = BlockToChain[RemBB];
+    if (auto It = BlockToChain.find(RemBB); It != BlockToChain.end()) {
+      BlockChain *Chain = It->second;
       InWorkList = Chain->UnscheduledPredecessors == 0;
       Chain->remove(RemBB);
-      BlockToChain.erase(RemBB);
+      BlockToChain.erase(It);
     }
 
     // Handle the unplaced block iterator
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 594ff5ac4c07f..d41b11307e7bc 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -894,7 +894,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   regsLive.clear();
   if (MRI->tracksLiveness()) {
     for (const auto &LI : MBB->liveins()) {
-      if (!Register::isPhysicalRegister(LI.PhysReg)) {
+      if (!LI.PhysReg.isPhysical()) {
         report("MBB live-in list contains non-physical register", MBB);
         continue;
       }
@@ -3448,7 +3448,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
   if (MRI->tracksLiveness())
     for (const auto &MBB : *MF)
       for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
-        MCPhysReg LiveInReg = P.PhysReg;
+        MCRegister LiveInReg = P.PhysReg;
         bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid();
         if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg))
           continue;
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index 7b4b7fdb3e76a..682d316a5bfac 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -895,7 +895,7 @@ void Liveness::computeLiveIns() {
 void Liveness::resetLiveIns() {
   for (auto &B : DFG.getMF()) {
     // Remove all live-ins.
-    std::vector<unsigned> T;
+    std::vector<MCRegister> T;
     for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
       T.push_back(LI.PhysReg);
     for (auto I : T)
@@ -917,7 +917,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
     for (auto I : B->liveins()) {
       MCSubRegIndexIterator S(I.PhysReg, &TRI);
       if (!S.isValid()) {
-        LV.set(I.PhysReg);
+        LV.set(I.PhysReg.id());
         continue;
       }
       do {
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 3863ca80bb44e..e2309b65cf9a2 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -276,7 +276,7 @@ class RegAllocFastImpl {
   // Assign index for each instruction to quickly determine dominance.
   InstrPosIndexes PosIndexes;
 
-  void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+  void setPhysRegState(MCRegister PhysReg, unsigned NewState);
   bool isPhysRegFree(MCPhysReg PhysReg) const;
 
   /// Mark a physreg as used in this instruction.
@@ -449,7 +449,7 @@ bool RegAllocFastImpl::shouldAllocateRegister(const Register Reg) const {
   return ShouldAllocateRegisterImpl(*TRI, *MRI, Reg);
 }
 
-void RegAllocFastImpl::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+void RegAllocFastImpl::setPhysRegState(MCRegister PhysReg, unsigned NewState) {
   for (MCRegUnit Unit : TRI->regunits(PhysReg))
     RegUnitStates[Unit] = NewState;
 }
@@ -671,7 +671,7 @@ void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) {
     return;
 
   for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
-    MCPhysReg Reg = P.PhysReg;
+    MCRegister Reg = P.PhysReg;
     // Set state to live-in. This possibly overrides mappings to virtual
     // registers but we don't care anymore at this point.
     setPhysRegState(Reg, regLiveIn);
@@ -688,7 +688,7 @@ void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) {
     if (PhysReg == 0 || LR.Error)
       continue;
 
-    MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
+    MCRegUnit FirstUnit = *TRI->regunits(PhysReg).begin();
     if (RegUnitStates[FirstUnit] == regLiveIn)
       continue;
 
@@ -758,7 +758,7 @@ bool RegAllocFastImpl::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
 void RegAllocFastImpl::freePhysReg(MCPhysReg PhysReg) {
   LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
 
-  MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
+  MCRegUnit FirstUnit = *TRI->regunits(PhysReg).begin();
   switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
   case regFree:
     LLVM_DEBUG(dbgs() << '\n');
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49e5b7d9ef014..21d5e0a1b2953 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22807,15 +22807,15 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
       Vec->getNumValues() != 1)
     return SDValue();
 
+  // Targets may want to avoid this to prevent an expensive register transfer.
+  if (!TLI.shouldScalarizeBinop(Vec))
+    return SDValue();
+
   EVT ResVT = ExtElt->getValueType(0);
   if (Opc == ISD::SETCC &&
       (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
     return SDValue();
 
-  // Targets may want to avoid this to prevent an expensive register transfer.
-  if (!TLI.shouldScalarizeBinop(Vec))
-    return SDValue();
-
   // Extracting an element of a vector constant is constant-folded, so this
   // transform is just replacing a vector op with a scalar op while moving the
   // extract.
@@ -22834,8 +22834,21 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
     EVT OpVT = Op0.getValueType().getVectorElementType();
     Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
     Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
-    return DAG.getSetCC(DL, ResVT, Op0, Op1,
-                        cast<CondCodeSDNode>(Vec->getOperand(2))->get());
+    SDValue NewVal = DAG.getSetCC(
+        DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
+    // We may need to sign- or zero-extend the result to match the same
+    // behaviour as the vector version of SETCC.
+    unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
+    if (ResVT != MVT::i1 &&
+        VecBoolContents != TargetLowering::UndefinedBooleanContent &&
+        VecBoolContents != TLI.getBooleanContents(ResVT)) {
+      if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent)
+        NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
+                             DAG.getValueType(MVT::i1));
+      else
+        NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
+    }
+    return NewVal;
   }
   Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
   Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
@@ -26373,9 +26386,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       if (AllSame)
         return N0;
 
-      // Canonicalize any other splat as a build_vector.
+      // Canonicalize any other splat as a build_vector, but avoid defining any
+      // undefined elements in the mask.
       SDValue Splatted = V->getOperand(SplatIndex);
       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+      EVT EltVT = Splatted.getValueType();
+
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (SVN->getMaskElt(i) < 0)
+          Ops[i] = DAG.getUNDEF(EltVT);
+      }
+
       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
 
       // We may have jumped through bitcasts, so the type of the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 368800d8b46a8..5861a95c090b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10959,12 +10959,9 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
     // The high part is obtained by SRA'ing all but one of the bits of low
     // part.
     unsigned LoSize = VT.getFixedSizeInBits();
-    HiLHS = DAG.getNode(
-        ISD::SRA, dl, VT, LHS,
-        DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
-    HiRHS = DAG.getNode(
-        ISD::SRA, dl, VT, RHS,
-        DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
+    SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
+    HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
+    HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
   } else {
     HiLHS = DAG.getConstant(0, dl, VT);
     HiRHS = DAG.getConstant(0, dl, VT);
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 5029f45def226..fa57eb30fac43 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -375,12 +375,7 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
 template <typename ListOfBBs, typename DominanceAnalysis>
 static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
                                    DominanceAnalysis &Dom, bool Strict = true) {
-  MachineBasicBlock *IDom = &Block;
-  for (MachineBasicBlock *BB : BBs) {
-    IDom = Dom.findNearestCommonDominator(IDom, BB);
-    if (!IDom)
-      break;
-  }
+  MachineBasicBlock *IDom = Dom.findNearestCommonDominator(iterator_range(BBs));
   if (Strict && IDom == &Block)
     return nullptr;
   return IDom;
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index be243c0e74e9d..fbbd92a2e0ca4 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -306,21 +306,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
                                                      Module &M) const {
   auto &C = getContext();
 
-  if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
-    auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
-                              ELF::SHF_EXCLUDE);
-
-    Streamer.switchSection(S);
-
-    for (const auto *Operand : LinkerOptions->operands()) {
-      if (cast<MDNode>(Operand)->getNumOperands() != 2)
-        report_fatal_error("invalid llvm.linker.options");
-      for (const auto &Option : cast<MDNode>(Operand)->operands()) {
-        Streamer.emitBytes(cast<MDString>(Option)->getString());
-        Streamer.emitInt8(0);
-      }
-    }
-  }
+  emitLinkerDirectives(Streamer, M);
 
   if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
     auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
@@ -400,6 +386,26 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
   emitCGProfileMetadata(Streamer, M);
 }
 
+void TargetLoweringObjectFileELF::emitLinkerDirectives(MCStreamer &Streamer,
+                                                       Module &M) const {
+  auto &C = getContext();
+  if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+    auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
+                              ELF::SHF_EXCLUDE);
+
+    Streamer.switchSection(S);
+
+    for (const auto *Operand : LinkerOptions->operands()) {
+      if (cast<MDNode>(Operand)->getNumOperands() != 2)
+        report_fatal_error("invalid llvm.linker.options");
+      for (const auto &Option : cast<MDNode>(Operand)->operands()) {
+        Streamer.emitBytes(cast<MDString>(Option)->getString());
+        Streamer.emitInt8(0);
+      }
+    }
+  }
+}
+
 MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
     const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
@@ -788,29 +794,35 @@ getGlobalObjectInfo(const GlobalObject *GO, const TargetMachine &TM) {
   return {Group, IsComdat, Flags};
 }
 
-static MCSection *selectExplicitSectionGlobal(
-    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM,
-    MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID,
-    bool Retain, bool ForceUnique) {
-  StringRef SectionName = GO->getSection();
-
+static StringRef handlePragmaClangSection(const GlobalObject *GO,
+                                          SectionKind Kind) {
   // Check if '#pragma clang section' name is applicable.
   // Note that pragma directive overrides -ffunction-section, -fdata-section
   // and so section name is exactly as user specified and not uniqued.
   const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
   if (GV && GV->hasImplicitSection()) {
     auto Attrs = GV->getAttributes();
-    if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
-      SectionName = Attrs.getAttribute("bss-section").getValueAsString();
-    } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
-      SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
-    } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
-      SectionName = Attrs.getAttribute("relro-section").getValueAsString();
-    } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
-      SectionName = Attrs.getAttribute("data-section").getValueAsString();
-    }
+    if (Attrs.hasAttribute("bss-section") && Kind.isBSS())
+      return Attrs.getAttribute("bss-section").getValueAsString();
+    else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())
+      return Attrs.getAttribute("rodata-section").getValueAsString();
+    else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel())
+      return Attrs.getAttribute("relro-section").getValueAsString();
+    else if (Attrs.hasAttribute("data-section") && Kind.isData())
+      return Attrs.getAttribute("data-section").getValueAsString();
   }
 
+  return GO->getSection();
+}
+
+static MCSection *selectExplicitSectionGlobal(const GlobalObject *GO,
+                                              SectionKind Kind,
+                                              const TargetMachine &TM,
+                                              MCContext &Ctx, Mangler &Mang,
+                                              unsigned &NextUniqueID,
+                                              bool Retain, bool ForceUnique) {
+  StringRef SectionName = handlePragmaClangSection(GO, Kind);
+
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
@@ -1238,14 +1250,7 @@ MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
 void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
                                                        Module &M) const {
   // Emit the linker options if present.
-  if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
-    for (const auto *Option : LinkerOptions->operands()) {
-      SmallVector<std::string, 4> StrOptions;
-      for (const auto &Piece : cast<MDNode>(Option)->operands())
-        StrOptions.push_back(std::string(cast<MDString>(Piece)->getString()));
-      Streamer.emitLinkerOptions(StrOptions);
-    }
-  }
+  emitLinkerDirectives(Streamer, M);
 
   unsigned VersionVal = 0;
   unsigned ImageInfoFlags = 0;
@@ -1279,6 +1284,18 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
   Streamer.addBlankLine();
 }
 
+void TargetLoweringObjectFileMachO::emitLinkerDirectives(MCStreamer &Streamer,
+                                                         Module &M) const {
+  if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+    for (const auto *Option : LinkerOptions->operands()) {
+      SmallVector<std::string, 4> StrOptions;
+      for (const auto &Piece : cast<MDNode>(Option)->operands())
+        StrOptions.push_back(std::string(cast<MDString>(Piece)->getString()));
+      Streamer.emitLinkerOptions(StrOptions);
+    }
+  }
+}
+
 static void checkMachOComdat(const GlobalValue *GV) {
   const Comdat *C = GV->getComdat();
   if (!C)
@@ -1291,21 +1308,7 @@ static void checkMachOComdat(const GlobalValue *GV) {
 MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
 
-  StringRef SectionName = GO->getSection();
-
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
-  if (GV && GV->hasImplicitSection()) {
-    auto Attrs = GV->getAttributes();
-    if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
-      SectionName = Attrs.getAttribute("bss-section").getValueAsString();
-    } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
-      SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
-    } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
-      SectionName = Attrs.getAttribute("relro-section").getValueAsString();
-    } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
-      SectionName = Attrs.getAttribute("data-section").getValueAsString();
-    }
-  }
+  StringRef SectionName = handlePragmaClangSection(GO, Kind);
 
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
@@ -1674,7 +1677,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
 
 MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
-  StringRef Name = GO->getSection();
+  StringRef Name = handlePragmaClangSection(GO, Kind);
   if (Name == getInstrProfSectionName(IPSK_covmap, Triple::COFF,
                                       /*AddSegmentInfo=*/false) ||
       Name == getInstrProfSectionName(IPSK_covfun, Triple::COFF,
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 785a8da64abe4..b6dcaeb323f59 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -301,6 +301,24 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
         InlineInfoData = InfoData;
         break;
 
+      case InfoType::CallSiteInfo:
+        if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) {
+          // Find matching call site based on relative offset
+          for (const auto &CS : CSIC->CallSites) {
+            // Check if the call site matches the lookup address
+            if (CS.ReturnOffset == Addr - FuncAddr) {
+              // Get regex patterns
+              for (uint32_t RegexOffset : CS.MatchRegex) {
+                LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset));
+              }
+              break;
+            }
+          }
+        } else {
+          return CSIC.takeError();
+        }
+        break;
+
       default:
         break;
     }
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
index 0ac0be6fda8f6..f906284455ebd 100644
--- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
     if (IsInlined)
       OS << " [inlined]";
   }
+
+  if (!LR.CallSiteFuncRegex.empty()) {
+    OS << "\n      CallSites: ";
+    for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) {
+      if (i > 0)
+        OS << ", ";
+      OS << LR.CallSiteFuncRegex[i];
+    }
+  }
+
   OS << '\n';
   return OS;
 }
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
index b9cfb8fb8fa96..eeccc87f8dcc3 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -86,5 +86,67 @@ void link_MachO(std::unique_ptr<LinkGraph> G,
   }
 }
 
+template <typename MachOHeaderType>
+static Expected<Block &> createLocalHeaderBlock(LinkGraph &G, Section &Sec) {
+  auto &B = G.createMutableContentBlock(Sec, sizeof(MachOHeaderType),
+                                        orc::ExecutorAddr(), 8, 0, true);
+  MachOHeaderType Hdr;
+  Hdr.magic = G.getPointerSize() == 4 ? MachO::MH_MAGIC : MachO::MH_MAGIC_64;
+  if (auto CPUType = MachO::getCPUType(G.getTargetTriple()))
+    Hdr.cputype = *CPUType;
+  else
+    return CPUType.takeError();
+  if (auto CPUSubType = MachO::getCPUSubType(G.getTargetTriple()))
+    Hdr.cpusubtype = *CPUSubType;
+  else
+    return CPUSubType.takeError();
+  Hdr.filetype = MachO::MH_OBJECT;
+
+  if (G.getEndianness() != endianness::native)
+    MachO::swapStruct(Hdr);
+
+  memcpy(B.getAlreadyMutableContent().data(), &Hdr, sizeof(Hdr));
+
+  return B;
+}
+
+Expected<Symbol &> getOrCreateLocalMachOHeader(LinkGraph &G) {
+  StringRef LocalHeaderSectionName("__TEXT,__lcl_macho_hdr");
+  Section *Sec = G.findSectionByName(LocalHeaderSectionName);
+  if (Sec) {
+    assert(Sec->blocks_size() == 1 && "Unexpected number of blocks");
+    assert(Sec->symbols_size() == 1 && "Unexpected number of symbols");
+    auto &Sym = **Sec->symbols().begin();
+    assert(Sym.getOffset() == 0 && "Symbol not at start of header block");
+    return Sym;
+  }
+
+  // Create the local header section, move all other sections up in the
+  // section ordering to ensure that it's laid out first.
+  for (auto &Sec : G.sections())
+    Sec.setOrdinal(Sec.getOrdinal() + 1);
+
+  Sec = &G.createSection(LocalHeaderSectionName, orc::MemProt::Read);
+
+  Sec->setOrdinal(0);
+
+  Block *B = nullptr;
+  switch (G.getTargetTriple().getArch()) {
+  case Triple::aarch64:
+  case Triple::x86_64:
+    if (auto BOrErr = createLocalHeaderBlock<MachO::mach_header_64>(G, *Sec))
+      B = &*BOrErr;
+    else
+      return BOrErr.takeError();
+    break;
+  default:
+    return make_error<JITLinkError>("Cannot create local Mach-O header for " +
+                                    G.getName() + ": unsupported triple " +
+                                    G.getTargetTriple().str());
+  }
+
+  return G.addAnonymousSymbol(*B, 0, B->getSize(), false, false);
+}
+
 } // end namespace jitlink
 } // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 76d5c1428ed64..3c0c90b62bc09 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -1058,12 +1058,9 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
     }
   }
 
-  if (S.PrePlatformSetup) {
-    if (auto Err2 = S.PrePlatformSetup(*this)) {
-      Err = std::move(Err2);
+  if (S.PrePlatformSetup)
+    if ((Err = S.PrePlatformSetup(*this)))
       return;
-    }
-  }
 
   if (!S.SetUpPlatform)
     S.SetUpPlatform = setUpGenericLLVMIRPlatform;
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index f8f65ec3b4cf3..48d54190fafb6 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -794,14 +794,29 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
 
   bool InBootstrapPhase = false;
 
-  if (LLVM_UNLIKELY(&MR.getTargetJITDylib() == &MP.PlatformJD)) {
+  ExecutorAddr HeaderAddr;
+  {
     std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
-    if (MP.Bootstrap) {
-      InBootstrapPhase = true;
-      ++MP.Bootstrap->ActiveGraphs;
+    if (LLVM_UNLIKELY(&MR.getTargetJITDylib() == &MP.PlatformJD)) {
+      if (MP.Bootstrap) {
+        InBootstrapPhase = true;
+        ++MP.Bootstrap->ActiveGraphs;
+      }
     }
+
+    // Get the dso-base address if available.
+    auto I = MP.JITDylibToHeaderAddr.find(&MR.getTargetJITDylib());
+    if (I != MP.JITDylibToHeaderAddr.end())
+      HeaderAddr = I->second;
   }
 
+  // Point the libunwind dso-base absolute symbol at the header for the
+  // JITDylib. This will prevent us from synthesizing a new header for
+  // every object.
+  if (HeaderAddr)
+    LG.addAbsoluteSymbol("__jitlink$libunwind_dso_base", HeaderAddr, 0,
+                         Linkage::Strong, Scope::Local, true);
+
   // If we're in the bootstrap phase then increment the active graphs.
   if (LLVM_UNLIKELY(InBootstrapPhase))
     Config.PostAllocationPasses.push_back([this](LinkGraph &G) {
@@ -857,10 +872,11 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
 
   // Add a pass to register the final addresses of any special sections in the
   // object with the runtime.
-  Config.PostAllocationPasses.push_back(
-      [this, &JD = MR.getTargetJITDylib(), InBootstrapPhase](LinkGraph &G) {
-        return registerObjectPlatformSections(G, JD, InBootstrapPhase);
-      });
+  Config.PostAllocationPasses.push_back([this, &JD = MR.getTargetJITDylib(),
+                                         HeaderAddr,
+                                         InBootstrapPhase](LinkGraph &G) {
+    return registerObjectPlatformSections(G, JD, HeaderAddr, InBootstrapPhase);
+  });
 
   // If we're in the bootstrap phase then steal allocation actions and then
   // decrement the active graphs.
@@ -1249,7 +1265,7 @@ MachOPlatform::MachOPlatformPlugin::findUnwindSectionInfo(
       SecRange.Start = std::min(SecRange.Start, R.Start);
       SecRange.End = std::max(SecRange.End, R.End);
       for (auto &E : B->edges()) {
-        if (!E.getTarget().isDefined())
+        if (E.getKind() != Edge::KeepAlive || !E.getTarget().isDefined())
           continue;
         auto &TargetBlock = E.getTarget().getBlock();
         auto &TargetSection = TargetBlock.getSection();
@@ -1307,7 +1323,8 @@ MachOPlatform::MachOPlatformPlugin::findUnwindSectionInfo(
 }
 
 Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
-    jitlink::LinkGraph &G, JITDylib &JD, bool InBootstrapPhase) {
+    jitlink::LinkGraph &G, JITDylib &JD, ExecutorAddr HeaderAddr,
+    bool InBootstrapPhase) {
 
   // Get a pointer to the thread data section if there is one. It will be used
   // below.
@@ -1378,22 +1395,13 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
         dbgs() << "  " << KV.first << ": " << KV.second << "\n";
     });
 
+    assert(HeaderAddr && "Null header registered for JD");
     using SPSRegisterObjectPlatformSectionsArgs = SPSArgList<
         SPSExecutorAddr,
         SPSOptional<SPSTuple<SPSSequence<SPSExecutorAddrRange>,
                              SPSExecutorAddrRange, SPSExecutorAddrRange>>,
         SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRange>>>;
 
-    ExecutorAddr HeaderAddr;
-    {
-      std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
-      auto I = MP.JITDylibToHeaderAddr.find(&JD);
-      assert(I != MP.JITDylibToHeaderAddr.end() &&
-             "No header registered for JD");
-      assert(I->second && "Null header registered for JD");
-      HeaderAddr = I->second;
-    }
-
     AllocActionCallPair AllocActions = {
         cantFail(
             WrapperFunctionCall::Create<SPSRegisterObjectPlatformSectionsArgs>(
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 933ede71746e0..9304e6c57a385 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -219,11 +219,10 @@ static void initializeUsedResources(InstrDesc &ID,
   });
 }
 
-static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
-                              const MCSchedClassDesc &SCDesc,
-                              const MCSubtargetInfo &STI,
-                              unsigned CallLatency) {
-  if (MCDesc.isCall()) {
+static void computeMaxLatency(InstrDesc &ID, const MCSchedClassDesc &SCDesc,
+                              const MCSubtargetInfo &STI, unsigned CallLatency,
+                              bool IsCall) {
+  if (IsCall) {
     // We cannot estimate how long this call will take.
     // Artificially set an arbitrarily high latency.
     ID.MaxLatency = CallLatency;
@@ -599,7 +598,8 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
   ID->NumMicroOps = SCDesc.NumMicroOps;
   ID->SchedClassID = SchedClassID;
 
-  if (MCDesc.isCall() && FirstCallInst) {
+  bool IsCall = MCIA->isCall(MCI);
+  if (IsCall && FirstCallInst) {
     // We don't correctly model calls.
     WithColor::warning() << "found a call in the input assembly sequence.\n";
     WithColor::note() << "call instructions are not correctly modeled. "
@@ -607,7 +607,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
     FirstCallInst = false;
   }
 
-  if (MCDesc.isReturn() && FirstReturnInst) {
+  if (MCIA->isReturn(MCI) && FirstReturnInst) {
     WithColor::warning() << "found a return instruction in the input"
                          << " assembly sequence.\n";
     WithColor::note() << "program counter updates are ignored.\n";
@@ -615,7 +615,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
   }
 
   initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
-  computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);
+  computeMaxLatency(*ID, SCDesc, STI, CallLatency, IsCall);
 
   if (Error Err = verifyOperands(MCDesc, MCI))
     return std::move(Err);
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index cc41bbe6bbde2..9ae76a71ede5e 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -536,15 +536,11 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS) {
 
 template <class ELFT>
 void ELFState<ELFT>::initProgramHeaders(std::vector<Elf_Phdr> &PHeaders) {
-  DenseMap<StringRef, ELFYAML::Fill *> NameToFill;
   DenseMap<StringRef, size_t> NameToIndex;
   for (size_t I = 0, E = Doc.Chunks.size(); I != E; ++I) {
-    if (auto S = dyn_cast<ELFYAML::Fill>(Doc.Chunks[I].get()))
-      NameToFill[S->Name] = S;
     NameToIndex[Doc.Chunks[I]->Name] = I + 1;
   }
 
-  std::vector<ELFYAML::Section *> Sections = Doc.getSections();
   for (size_t I = 0, E = Doc.ProgramHeaders.size(); I != E; ++I) {
     ELFYAML::ProgramHeader &YamlPhdr = Doc.ProgramHeaders[I];
     Elf_Phdr Phdr;
diff --git a/llvm/lib/Passes/DroppedVariableStatsIR.cpp b/llvm/lib/Passes/DroppedVariableStatsIR.cpp
index 496a47e71182e..e1c277e87efb3 100644
--- a/llvm/lib/Passes/DroppedVariableStatsIR.cpp
+++ b/llvm/lib/Passes/DroppedVariableStatsIR.cpp
@@ -15,7 +15,8 @@
 
 using namespace llvm;
 
-void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) {
+void DroppedVariableStatsIR::runOnFunction(StringRef PassID, const Function *F,
+                                           bool Before) {
   auto &DebugVariables = DebugVariablesStack.back()[F];
   auto FuncName = F->getName();
   Func = F;
@@ -32,9 +33,11 @@ void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction(
                                 PassLevel, Func);
 }
 
-void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) {
-  for (auto &F : *M)
-    runOnFunction(&F, Before);
+void DroppedVariableStatsIR::runOnModule(StringRef PassID, const Module *M,
+                                         bool Before) {
+  for (auto &F : *M) {
+    runOnFunction(PassID, &F, Before);
+  }
 }
 
 void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule(
@@ -51,7 +54,7 @@ void DroppedVariableStatsIR::registerCallbacks(
     return;
 
   PIC.registerBeforeNonSkippedPassCallback(
-      [this](StringRef P, Any IR) { return runBeforePass(IR); });
+      [this](StringRef P, Any IR) { return runBeforePass(P, IR); });
   PIC.registerAfterPassCallback(
       [this](StringRef P, Any IR, const PreservedAnalyses &PA) {
         return runAfterPass(P, IR);
diff --git a/llvm/lib/SandboxIR/Type.cpp b/llvm/lib/SandboxIR/Type.cpp
index 4734d51be2822..51128bf307e9f 100644
--- a/llvm/lib/SandboxIR/Type.cpp
+++ b/llvm/lib/SandboxIR/Type.cpp
@@ -45,10 +45,6 @@ void Type::dump() {
 }
 #endif
 
-PointerType *PointerType::get(Type *ElementType, unsigned AddressSpace) {
-  return get(ElementType->getContext(), AddressSpace);
-}
-
 PointerType *PointerType::get(Context &Ctx, unsigned AddressSpace) {
   return cast<PointerType>(
       Ctx.getType(llvm::PointerType::get(Ctx.LLVMCtx, AddressSpace)));
diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index e8a4d73c671c9..4d0d99bce258a 100644
--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -251,7 +251,7 @@ static bool supportLoadFromLiteral(const MachineInstr &MI) {
 /// Number of GPR registers traked by mapRegToGPRIndex()
 static const unsigned N_GPR_REGS = 31;
 /// Map register number to index from 0-30.
-static int mapRegToGPRIndex(MCPhysReg Reg) {
+static int mapRegToGPRIndex(MCRegister Reg) {
   static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
   static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
   if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index ffc2d27a57c93..0a91edb4c1661 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -859,8 +859,8 @@ def HasV8_6aOps : Architecture64<8, 6, "a", "v8.6a",
     FeatureEnhancedCounterVirtualization, FeatureMatMulInt8],
   !listconcat(HasV8_5aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8])>;
 def HasV8_7aOps : Architecture64<8, 7, "a", "v8.7a",
-  [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX, FeatureSPE_EEF],
-  !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT])>;
+  [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX],
+  !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT, FeatureSPE_EEF])>;
 def HasV8_8aOps : Architecture64<8, 8, "a", "v8.8a",
   [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI],
   !listconcat(HasV8_7aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
@@ -875,17 +875,19 @@ def HasV9_0aOps : Architecture64<9, 0, "a", "v9a",
     FeatureSVE2])>;
 def HasV9_1aOps : Architecture64<9, 1, "a", "v9.1a",
   [HasV8_6aOps, HasV9_0aOps],
-  !listconcat(HasV9_0aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8, FeatureRME])>;
+  !listconcat(HasV9_0aOps.DefaultExts, HasV8_6aOps.DefaultExts,
+              [FeatureRME])>;
 def HasV9_2aOps : Architecture64<9, 2, "a", "v9.2a",
   [HasV8_7aOps, HasV9_1aOps],
-  !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC, FeatureWFxT])>;
+  !listconcat(HasV9_1aOps.DefaultExts, HasV8_7aOps.DefaultExts,
+              [FeatureMEC])>;
 def HasV9_3aOps : Architecture64<9, 3, "a", "v9.3a",
   [HasV8_8aOps, HasV9_2aOps],
-  !listconcat(HasV9_2aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
+  !listconcat(HasV9_2aOps.DefaultExts, HasV8_8aOps.DefaultExts, [])>;
 def HasV9_4aOps : Architecture64<9, 4, "a", "v9.4a",
   [HasV8_9aOps, HasV9_3aOps],
-  !listconcat(HasV9_3aOps.DefaultExts, [FeatureSPECRES2, FeatureCSSC,
-    FeatureRASv2, FeatureSVE2p1])>;
+  !listconcat(HasV9_3aOps.DefaultExts, HasV8_9aOps.DefaultExts,
+              [FeatureSVE2p1])>;
 def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
   [HasV9_4aOps, FeatureCPA],
   !listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA,  FeatureLUT, FeatureFAMINMAX])>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6b8a7e9559e00..7ccd65b105220 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5278,7 +5278,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                            Register SrcReg, bool isKill, int FI,
                                            const TargetRegisterClass *RC,
                                            const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+                                           Register VReg,
+                                           MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
@@ -5445,12 +5446,10 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
       .addMemOperand(MMO);
 }
 
-void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator MBBI,
-                                            Register DestReg, int FI,
-                                            const TargetRegisterClass *RC,
-                                            const TargetRegisterInfo *TRI,
-                                            Register VReg) const {
+void AArch64InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index e37f70f7d985d..9a0034223ab9b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -347,18 +347,17 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   // This tells target independent code that it is okay to pass instructions
   // with subreg operands to foldMemoryOperandImpl.
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 0e3c4e8397f52..2de8d4637d372 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -929,7 +929,8 @@ def ProcessorFeatures {
                                     FeatureComplxNum, FeatureCRC, FeatureJS,
                                     FeatureLSE, FeaturePAuth, FeatureFPAC,
                                     FeatureRAS, FeatureRCPC, FeatureRDM,
-                                    FeatureDotProd, FeatureMatMulInt8];
+                                    FeatureDotProd, FeatureMatMulInt8,
+                                    FeatureSPE_EEF];
   list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureSHA2, FeatureAES,
                                      FeaturePerfMon, FeatureNEON, FeatureFPARMv8];
   list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureSHA2, FeatureAES, FeatureDotProd,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 27c88a55919e6..6d5e2697160ab 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4277,8 +4277,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
   defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>;
   defm FCVTZU_ZPzZ : sve_fp_z2op_p_zd_d<0b1, "fcvtzu", "int_aarch64_sve_fcvtzu", AArch64fcvtzu_mt>;
   // Integer convert to floating-point, zeroing predicate
-  defm SCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b0, "scvtf">;
-  defm UCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b1, "ucvtf">;
+  defm SCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>;
+  defm UCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>;
   // Signed integer base 2 logarithm of fp value, zeroing predicate
   defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
 
@@ -4314,11 +4314,11 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
   defm FSQRT_ZPZz  : sve_fp_z2op_p_zd_hsd<0b01101, "fsqrt">;
 
   // SVE2p2 integer unary arithmetic (bitwise), zeroing predicate
-  defm CLS_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b000, "cls">;
-  defm CLZ_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b001, "clz">;
-  defm CNT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b010, "cnt">;
-  defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot">;
-  defm NOT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b110, "not">;
+  defm CLS_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b000,  "cls", AArch64cls_mt>;
+  defm CLZ_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b001,  "clz", AArch64clz_mt>;
+  defm CNT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b010,  "cnt", AArch64cnt_mt>;
+  defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot", AArch64cnot_mt>;
+  defm NOT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b110,  "not", AArch64not_mt>;
 
   // floating point
   defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 3767b34bd5b0c..bc921f07e1dbf 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -358,7 +358,10 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
       IsLittle(LittleEndian), IsStreaming(IsStreaming),
       IsStreamingCompatible(IsStreamingCompatible),
-      StreamingHazardSize(AArch64StreamingHazardSize),
+      StreamingHazardSize(
+          AArch64StreamingHazardSize.getNumOccurrences() > 0
+              ? std::optional<unsigned>(AArch64StreamingHazardSize)
+              : std::nullopt),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7b1f316d048e5..d22991224d496 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -85,7 +85,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool IsStreaming;
   bool IsStreamingCompatible;
-  unsigned StreamingHazardSize;
+  std::optional<unsigned> StreamingHazardSize;
   unsigned MinSVEVectorSizeInBits;
   unsigned MaxSVEVectorSizeInBits;
   unsigned VScaleForTuning = 1;
@@ -179,7 +179,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   /// Returns the size of memory region that if accessed by both the CPU and
   /// the SME unit could result in a hazard. 0 = disabled.
-  unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
+  unsigned getStreamingHazardSize() const {
+    return StreamingHazardSize.value_or(
+        !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
+  }
 
   /// Returns true if the target has NEON and the function at runtime is known
   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 873fbf7dd346b..2ee9910da5079 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3306,7 +3306,7 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
   defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
 }
 
-multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
+multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperator ir_op> {
   def _HtoH : sve_fp_z2op_p_zd<{ 0b011001, U }, asm, ZPR16, ZPR16>;
   def _StoH : sve_fp_z2op_p_zd<{ 0b011010, U }, asm, ZPR32, ZPR16>;
   def _StoS : sve_fp_z2op_p_zd<{ 0b101010, U }, asm, ZPR32, ZPR32>;
@@ -3314,6 +3314,15 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
   def _DtoS : sve_fp_z2op_p_zd<{ 0b111010, U }, asm, ZPR64, ZPR32>;
   def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>;
   def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>;
+
+  defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(int_op # _f32i64), nxv4f32, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoS)>;
+  defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(int_op # _f64i32), nxv2f64, nxv2i1, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
+  defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i32), nxv8f16, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _StoH)>;
+  defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i64), nxv8f16, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoH)>;
+
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, ir_op, nxv8i1,nxv8i16, !cast<Instruction>(NAME # _HtoH)>;
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, ir_op, nxv4i1,nxv4i32, !cast<Instruction>(NAME # _StoS)>;
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
 }
 
 multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
@@ -4966,11 +4975,16 @@ multiclass sve_int_un_pred_arit_bitwise<bits<3> opc, string asm,
   defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm, SDPatternOperator op> {
   def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b1 }, asm, ZPR8>;
   def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
   def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
+  defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
 }
 
 multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 5d9a830f041a7..3d5a44a3623a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -177,11 +177,11 @@ extern char &SIShrinkInstructionsLegacyID;
 void initializeSIFixSGPRCopiesLegacyPass(PassRegistry &);
 extern char &SIFixSGPRCopiesLegacyID;
 
-void initializeSIFixVGPRCopiesPass(PassRegistry &);
+void initializeSIFixVGPRCopiesLegacyPass(PassRegistry &);
 extern char &SIFixVGPRCopiesID;
 
-void initializeSILowerWWMCopiesPass(PassRegistry &);
-extern char &SILowerWWMCopiesID;
+void initializeSILowerWWMCopiesLegacyPass(PassRegistry &);
+extern char &SILowerWWMCopiesLegacyID;
 
 void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
 extern char &SILowerI1CopiesLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 737b2f740d6f7..0c151d06924d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -363,6 +363,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
   using RIK = MCResourceInfo::ResourceInfoKind;
   const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
   MCSymbol *FnSym = TM.getSymbol(&F);
+  bool IsLocal = F.hasLocalLinkage();
 
   auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
     int64_t Val;
@@ -375,8 +376,8 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
 
   const uint64_t MaxScratchPerWorkitem =
       STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
-  MCSymbol *ScratchSizeSymbol =
-      RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
+  MCSymbol *ScratchSizeSymbol = RI.getSymbol(
+      FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext, IsLocal);
   uint64_t ScratchSize;
   if (ScratchSizeSymbol->isVariable() &&
       TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
@@ -389,7 +390,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
   // Validate addressable scalar registers (i.e., prior to added implicit
   // SGPRs).
   MCSymbol *NumSGPRSymbol =
-      RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
+      RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext, IsLocal);
   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
       !STM.hasSGPRInitBug()) {
     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
@@ -406,9 +407,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
   }
 
   MCSymbol *VCCUsedSymbol =
-      RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
-  MCSymbol *FlatUsedSymbol =
-      RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
+      RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext, IsLocal);
+  MCSymbol *FlatUsedSymbol = RI.getSymbol(
+      FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext, IsLocal);
   uint64_t VCCUsed, FlatUsed, NumSgpr;
 
   if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
@@ -435,9 +436,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
     }
 
     MCSymbol *NumVgprSymbol =
-        RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
+        RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext, IsLocal);
     MCSymbol *NumAgprSymbol =
-        RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
+        RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext, IsLocal);
     uint64_t NumVgpr, NumAgpr;
 
     MachineModuleInfo &MMI =
@@ -655,6 +656,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   MCContext &Context = getObjFileLowering().getContext();
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
   // FIXME: This should be an explicit check for Mesa.
   if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
     MCSectionELF *ConfigSection =
@@ -700,20 +702,24 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   {
     using RIK = MCResourceInfo::ResourceInfoKind;
     getTargetStreamer()->EmitMCResourceInfo(
-        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
-        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
-        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
+        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
+                     IsLocal),
+        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext,
+                     IsLocal),
+        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext,
+                     IsLocal),
         RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
-                     OutContext),
-        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
+                     OutContext, IsLocal),
+        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext,
+                     IsLocal),
         RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
-                     OutContext),
+                     OutContext, IsLocal),
         RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
-                     OutContext),
-        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
-                     OutContext),
+                     OutContext, IsLocal),
+        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion, OutContext,
+                     IsLocal),
         RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
-                     OutContext));
+                     OutContext, IsLocal));
   }
 
   if (isVerbose()) {
@@ -726,19 +732,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       OutStreamer->emitRawComment(" Function info:", false);
 
       emitCommonFunctionComments(
-          RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
+          RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
+                       IsLocal)
               ->getVariableValue(),
-          STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
-                                           RIK::RIK_NumAGPR, OutContext)
-                                  ->getVariableValue()
-                            : nullptr,
+          STM.hasMAIInsts()
+              ? RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR,
+                             OutContext, IsLocal)
+                    ->getVariableValue()
+              : nullptr,
           RI.createTotalNumVGPRs(MF, Ctx),
           RI.createTotalNumSGPRs(
               MF,
               MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
               Ctx),
           RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
-                       OutContext)
+                       OutContext, IsLocal)
               ->getVariableValue(),
           getFunctionCodeSize(MF), MFI);
       return false;
@@ -927,6 +935,7 @@ static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
   MCContext &Ctx = MF.getContext();
 
   auto CreateExpr = [&Ctx](int64_t Value) {
@@ -944,7 +953,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
   auto GetSymRefExpr =
       [&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
-    MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
+    MCSymbol *Sym =
+        RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext, IsLocal);
     return MCSymbolRefExpr::create(Sym, Ctx);
   };
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 9511b6bb7de06..47679f89f3f02 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPUMCResourceInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetMachine.h"
@@ -22,9 +23,12 @@
 using namespace llvm;
 
 MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
-                                    MCContext &OutContext) {
-  auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
-    return OutContext.getOrCreateSymbol(FuncName + Twine(Suffix));
+                                    MCContext &OutContext, bool IsLocal) {
+  auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) {
+    StringRef Prefix =
+        IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
+    return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName +
+                                        Twine(Suffix));
   };
   switch (RIK) {
   case RIK_NumVGPR:
@@ -51,8 +55,8 @@ MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
 
 const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
                                             ResourceInfoKind RIK,
-                                            MCContext &Ctx) {
-  return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx), Ctx);
+                                            MCContext &Ctx, bool IsLocal) {
+  return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx, IsLocal), Ctx);
 }
 
 void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
@@ -96,11 +100,12 @@ void MCResourceInfo::assignResourceInfoExpr(
     const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
     MCContext &OutContext) {
   const TargetMachine &TM = MF.getTarget();
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
   const MCConstantExpr *LocalConstExpr =
       MCConstantExpr::create(LocalValue, OutContext);
   const MCExpr *SymVal = LocalConstExpr;
-  MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
+  MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
   if (!Callees.empty()) {
     SmallVector<const MCExpr *, 8> ArgExprs;
     SmallPtrSet<const Function *, 8> Seen;
@@ -110,9 +115,10 @@ void MCResourceInfo::assignResourceInfoExpr(
       if (!Seen.insert(Callee).second)
         continue;
 
+      bool IsCalleeLocal = Callee->hasLocalLinkage();
       MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
       MCSymbol *CalleeValSym =
-          getSymbol(CalleeFnSym->getName(), RIK, OutContext);
+          getSymbol(CalleeFnSym->getName(), RIK, OutContext, IsCalleeLocal);
 
       // Avoid constructing recursive definitions by detecting whether `Sym` is
       // found transitively within any of its `CalleeValSym`.
@@ -155,6 +161,7 @@ void MCResourceInfo::gatherResourceInfo(
   MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
   MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
   MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
 
   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())) {
     addMaxVGPRCandidate(FRI.NumVGPR);
@@ -172,7 +179,8 @@ void MCResourceInfo::gatherResourceInfo(
                              FRI.Callees, OutContext);
     } else {
       const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
-      MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
+      MCSymbol *LocalNumSym =
+          getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
       const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
           {MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
       LocalNumSym->setVariableValue(MaxWithLocal);
@@ -187,7 +195,8 @@ void MCResourceInfo::gatherResourceInfo(
     // The expression for private segment size should be: FRI.PrivateSegmentSize
     // + max(FRI.Callees, FRI.CalleeSegmentSize)
     SmallVector<const MCExpr *, 8> ArgExprs;
-    MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
+    MCSymbol *Sym =
+        getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext, IsLocal);
     if (FRI.CalleeSegmentSize)
       ArgExprs.push_back(
           MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -198,9 +207,11 @@ void MCResourceInfo::gatherResourceInfo(
       if (!Seen.insert(Callee).second)
         continue;
       if (!Callee->isDeclaration()) {
+        bool IsCalleeLocal = Callee->hasLocalLinkage();
         MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
         MCSymbol *CalleeValSym =
-            getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
+            getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext,
+                      IsCalleeLocal);
 
         // Avoid constructing recursive definitions by detecting whether `Sym`
         // is found transitively within any of its `CalleeValSym`.
@@ -223,7 +234,7 @@ void MCResourceInfo::gatherResourceInfo(
   }
 
   auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
-    MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
+    MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
     Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
   };
 
@@ -255,9 +266,10 @@ const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
                                                   MCContext &Ctx) {
   const TargetMachine &TM = MF.getTarget();
   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
   return AMDGPUMCExpr::createTotalNumVGPR(
-      getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
-      getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
+      getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx, IsLocal),
+      getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx, IsLocal), Ctx);
 }
 
 const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
@@ -265,11 +277,12 @@ const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
                                                   MCContext &Ctx) {
   const TargetMachine &TM = MF.getTarget();
   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+  bool IsLocal = MF.getFunction().hasLocalLinkage();
   return MCBinaryExpr::createAdd(
-      getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
+      getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx, IsLocal),
       AMDGPUMCExpr::createExtraSGPRs(
-          getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
-          getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
-          Ctx),
+          getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx, IsLocal),
+          getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx, IsLocal),
+          hasXnack, Ctx),
       Ctx);
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
index 9dc34100e644e..a670878948c31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
@@ -71,9 +71,9 @@ class MCResourceInfo {
   }
 
   MCSymbol *getSymbol(StringRef FuncName, ResourceInfoKind RIK,
-                      MCContext &OutContext);
+                      MCContext &OutContext, bool IsLocal);
   const MCExpr *getSymRefExpr(StringRef FuncName, ResourceInfoKind RIK,
-                              MCContext &Ctx);
+                              MCContext &Ctx, bool IsLocal);
 
   void reset();
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 09a39d23d801b..142b41f030635 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -99,11 +99,13 @@ FUNCTION_PASS_WITH_PARAMS(
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
+MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 53ec80b8f7204..ce0b10b804ba1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -36,10 +36,12 @@
 #include "R600.h"
 #include "R600TargetMachine.h"
 #include "SIFixSGPRCopies.h"
+#include "SIFixVGPRCopies.h"
 #include "SIFoldOperands.h"
 #include "SILoadStoreOptimizer.h"
 #include "SILowerControlFlow.h"
 #include "SILowerSGPRSpills.h"
+#include "SILowerWWMCopies.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIOptimizeExecMasking.h"
@@ -482,11 +484,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeAMDGPURegBankSelectPass(*PR);
   initializeAMDGPURegBankLegalizePass(*PR);
-  initializeSILowerWWMCopiesPass(*PR);
+  initializeSILowerWWMCopiesLegacyPass(*PR);
   initializeAMDGPUMarkLastScratchLoadPass(*PR);
   initializeSILowerSGPRSpillsLegacyPass(*PR);
   initializeSIFixSGPRCopiesLegacyPass(*PR);
-  initializeSIFixVGPRCopiesPass(*PR);
+  initializeSIFixVGPRCopiesLegacyPass(*PR);
   initializeSIFoldOperandsLegacyPass(*PR);
   initializeSIPeepholeSDWALegacyPass(*PR);
   initializeSIShrinkInstructionsLegacyPass(*PR);
@@ -1581,7 +1583,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
   // For allocating other wwm register operands.
   addPass(createWWMRegAllocPass(false));
 
-  addPass(&SILowerWWMCopiesID);
+  addPass(&SILowerWWMCopiesLegacyID);
   addPass(&AMDGPUReserveWWMRegsID);
 
   // For allocating per-thread VGPRs.
@@ -1617,7 +1619,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 
   // For allocating other whole wave mode registers.
   addPass(createWWMRegAllocPass(true));
-  addPass(&SILowerWWMCopiesID);
+  addPass(&SILowerWWMCopiesLegacyID);
   addPass(createVirtRegRewriter(false));
   addPass(&AMDGPUReserveWWMRegsID);
 
@@ -2107,7 +2109,7 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
 }
 
 void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
-  // addPass(SIFixVGPRCopiesID);
+  addPass(SIFixVGPRCopiesPass());
   if (TM.getOptLevel() > CodeGenOptLevel::None)
     addPass(SIOptimizeExecMaskingPass());
   Base::addPostRegAlloc(addPass);
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 6baef137df5e1..873d18e30a430 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -858,9 +858,12 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
   }
 
   if (TII->isFLAT(MI)) {
-    int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
-    if (AMDGPU::getRegBitWidth(Desc.operands()[DataIdx].RegClass) > 64)
-      return DataIdx;
+    // There is no hazard if the instruction does not use vector regs
+    if (VDataIdx == -1)
+      return -1;
+
+    if (AMDGPU::getRegBitWidth(VDataRCID) > 64)
+      return VDataIdx;
   }
 
   return -1;
diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index 08272a9ddfd30..d0d679221eee0 100644
--- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -11,6 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "SIFixVGPRCopies.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -22,13 +23,12 @@ using namespace llvm;
 
 namespace {
 
-class SIFixVGPRCopies : public MachineFunctionPass {
+class SIFixVGPRCopiesLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-public:
-  SIFixVGPRCopies() : MachineFunctionPass(ID) {
-    initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
+  SIFixVGPRCopiesLegacy() : MachineFunctionPass(ID) {
+    initializeSIFixVGPRCopiesLegacyPass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -41,15 +41,31 @@ class SIFixVGPRCopies : public MachineFunctionPass {
   StringRef getPassName() const override { return "SI Fix VGPR copies"; }
 };
 
+class SIFixVGPRCopies {
+public:
+  bool run(MachineFunction &MF);
+};
+
 } // End anonymous namespace.
 
-INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false)
+INITIALIZE_PASS(SIFixVGPRCopiesLegacy, DEBUG_TYPE, "SI Fix VGPR copies", false,
+                false)
 
-char SIFixVGPRCopies::ID = 0;
+char SIFixVGPRCopiesLegacy::ID = 0;
 
-char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
+char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopiesLegacy::ID;
+
+PreservedAnalyses SIFixVGPRCopiesPass::run(MachineFunction &MF,
+                                           MachineFunctionAnalysisManager &) {
+  SIFixVGPRCopies().run(MF);
+  return PreservedAnalyses::all();
+}
+
+bool SIFixVGPRCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
+  return SIFixVGPRCopies().run(MF);
+}
 
-bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+bool SIFixVGPRCopies::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h
new file mode 100644
index 0000000000000..7b098b71597ff
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h
@@ -0,0 +1,22 @@
+//===- SIFixVGPRCopies.h ----------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFIXVGPRCOPIES_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFIXVGPRCOPIES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIFixVGPRCopiesPass : public PassInfoMixin<SIFixVGPRCopiesPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIFIXVGPRCOPIES_H
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8fc32d9e60bf2..1d98d68a2ea5d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1724,7 +1724,8 @@ static unsigned getVectorRegSpillSaveOpcode(Register Reg,
 void SIInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool isKill, int FrameIndex, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
@@ -1951,7 +1952,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                        Register DestReg, int FrameIndex,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI,
-                                       Register VReg) const {
+                                       Register VReg,
+                                       MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
@@ -2749,6 +2751,63 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
   return &MI;
 }
 
+static MachineInstr *swapImmOperands(MachineInstr &MI,
+                                     MachineOperand &NonRegOp1,
+                                     MachineOperand &NonRegOp2) {
+  unsigned TargetFlags = NonRegOp1.getTargetFlags();
+  int64_t NonRegVal = NonRegOp1.getImm();
+
+  NonRegOp1.setImm(NonRegOp2.getImm());
+  NonRegOp2.setImm(NonRegVal);
+  NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+  NonRegOp2.setTargetFlags(TargetFlags);
+  return &MI;
+}
+
+bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0,
+                                const MachineOperand *MO0, unsigned OpIdx1,
+                                const MachineOperand *MO1) const {
+  const MCInstrDesc &InstDesc = MI.getDesc();
+  const MCOperandInfo &OpInfo0 = InstDesc.operands()[OpIdx0];
+  const MCOperandInfo &OpInfo1 = InstDesc.operands()[OpIdx1];
+  const TargetRegisterClass *DefinedRC1 =
+      OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo1.RegClass) : nullptr;
+  const TargetRegisterClass *DefinedRC0 =
+      OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo0.RegClass) : nullptr;
+
+  unsigned Opc = MI.getOpcode();
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+
+  // Swap doesn't breach constant bus or literal limits
+  // It may move literal to position other than src0, this is not allowed
+  // pre-gfx10 However, most test cases need literals in Src0 for VOP
+  // FIXME: After gfx9, literal can be in place other than Src0
+  if (isVALU(MI)) {
+    if ((int)OpIdx0 == Src0Idx && !MO0->isReg() &&
+        !isInlineConstant(*MO0, OpInfo1))
+      return false;
+    if ((int)OpIdx1 == Src0Idx && !MO1->isReg() &&
+        !isInlineConstant(*MO1, OpInfo0))
+      return false;
+  }
+
+  if ((int)OpIdx1 != Src0Idx && MO0->isReg()) {
+    if (!DefinedRC1)
+      return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN;
+    return isLegalRegOperand(MI, OpIdx1, *MO0);
+  }
+  if ((int)OpIdx0 != Src0Idx && MO1->isReg()) {
+    if (!DefinedRC0)
+      return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN;
+    return isLegalRegOperand(MI, OpIdx0, *MO1);
+  }
+
+  // No need to check 64-bit literals since swapping does not bring new
+  // 64-bit literals into current instruction to fold to 32-bit
+
+  return isImmOperandLegal(MI, OpIdx1, *MO0);
+}
+
 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                   unsigned Src0Idx,
                                                   unsigned Src1Idx) const {
@@ -2770,21 +2829,20 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 
   MachineOperand &Src0 = MI.getOperand(Src0Idx);
   MachineOperand &Src1 = MI.getOperand(Src1Idx);
-
+  if (!isLegalToSwap(MI, Src0Idx, &Src0, Src1Idx, &Src1)) {
+    return nullptr;
+  }
   MachineInstr *CommutedMI = nullptr;
   if (Src0.isReg() && Src1.isReg()) {
-    if (isOperandLegal(MI, Src1Idx, &Src0)) {
-      // Be sure to copy the source modifiers to the right place.
-      CommutedMI
-        = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
-    }
-
+    // Be sure to copy the source modifiers to the right place.
+    CommutedMI =
+        TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
   } else if (Src0.isReg() && !Src1.isReg()) {
-    if (isOperandLegal(MI, Src1Idx, &Src0))
-      CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+    CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
   } else if (!Src0.isReg() && Src1.isReg()) {
-    if (isOperandLegal(MI, Src1Idx, &Src0))
-      CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
+    CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
+  } else if (Src0.isImm() && Src1.isImm()) {
+    CommutedMI = swapImmOperands(MI, Src0, Src1);
   } else {
     // FIXME: Found two non registers to commute. This does happen.
     return nullptr;
@@ -5817,6 +5875,49 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
   return RC->hasSuperClassEq(DRC);
 }
 
+bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
+                                    const MachineOperand &MO) const {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
+  unsigned Opc = MI.getOpcode();
+
+  if (!isLegalRegOperand(MRI, OpInfo, MO))
+    return false;
+
+  // check Accumulate GPR operand
+  bool IsAGPR = RI.isAGPR(MRI, MO.getReg());
+  if (IsAGPR && !ST.hasMAIInsts())
+    return false;
+  if (IsAGPR && (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
+      (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc)))
+    return false;
+  // Atomics should have both vdst and vdata either vgpr or agpr.
+  const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+  const int DataIdx = AMDGPU::getNamedOperandIdx(
+      Opc, isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
+  if ((int)OpIdx == VDstIdx && DataIdx != -1 &&
+      MI.getOperand(DataIdx).isReg() &&
+      RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR)
+    return false;
+  if ((int)OpIdx == DataIdx) {
+    if (VDstIdx != -1 &&
+        RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR)
+      return false;
+    // DS instructions with 2 src operands also must have tied RC.
+    const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
+    if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() &&
+        RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
+      return false;
+  }
+
+  // Check V_ACCVGPR_WRITE_B32_e64
+  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
+      (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
+      RI.isSGPRReg(MRI, MO.getReg()))
+    return false;
+  return true;
+}
+
 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
                                      const MCOperandInfo &OpInfo,
                                      const MachineOperand &MO) const {
@@ -5879,40 +5980,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
   if (MO->isReg()) {
     if (!DefinedRC)
       return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN;
-    if (!isLegalRegOperand(MRI, OpInfo, *MO))
-      return false;
-    bool IsAGPR = RI.isAGPR(MRI, MO->getReg());
-    if (IsAGPR && !ST.hasMAIInsts())
-      return false;
-    unsigned Opc = MI.getOpcode();
-    if (IsAGPR &&
-        (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
-        (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc)))
-      return false;
-    // Atomics should have both vdst and vdata either vgpr or agpr.
-    const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
-    const int DataIdx = AMDGPU::getNamedOperandIdx(Opc,
-        isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
-    if ((int)OpIdx == VDstIdx && DataIdx != -1 &&
-        MI.getOperand(DataIdx).isReg() &&
-        RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR)
-      return false;
-    if ((int)OpIdx == DataIdx) {
-      if (VDstIdx != -1 &&
-          RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR)
-        return false;
-      // DS instructions with 2 src operands also must have tied RC.
-      const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc,
-                                                      AMDGPU::OpName::data1);
-      if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() &&
-          RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
-        return false;
-    }
-    if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
-        (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
-        RI.isSGPRReg(MRI, MO->getReg()))
-      return false;
-    return true;
+    return isLegalRegOperand(MI, OpIdx, *MO);
   }
 
   if (MO->isImm()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d49939bf81b10..933935a86f9f9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -193,7 +193,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   bool swapSourceModifiers(MachineInstr &MI,
                            MachineOperand &Src0, unsigned Src0OpName,
                            MachineOperand &Src1, unsigned Src1OpName) const;
-
+  bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
+                     const MachineOperand *fromMO, unsigned toIdx,
+                     const MachineOperand *toMO) const;
   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                        unsigned OpIdx0,
                                        unsigned OpIdx1) const override;
@@ -280,18 +282,17 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
                     Register SrcReg, int Value)  const;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
@@ -1218,11 +1219,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                           const MachineOperand &MO) const;
 
   /// Check if \p MO (a register operand) is a legal register for the
-  /// given operand description.
+  /// given operand description or operand index.
+  /// The operand index version provide more legality checks
   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
                          const MCOperandInfo &OpInfo,
                          const MachineOperand &MO) const;
-
+  bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
+                         const MachineOperand &MO) const;
   /// Legalize operands in \p MI by either commuting it or inserting a
   /// copy of src1.
   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
index c663820311b8c..ef384c2a1a215 100644
--- a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
@@ -15,6 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SILowerWWMCopies.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -30,12 +31,30 @@ using namespace llvm;
 
 namespace {
 
-class SILowerWWMCopies : public MachineFunctionPass {
+class SILowerWWMCopies {
+public:
+  SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM)
+      : LIS(LIS), Indexes(SI), VRM(VRM) {}
+  bool run(MachineFunction &MF);
+
+private:
+  bool isSCCLiveAtMI(const MachineInstr &MI);
+  void addToWWMSpills(MachineFunction &MF, Register Reg);
+
+  LiveIntervals *LIS;
+  SlotIndexes *Indexes;
+  VirtRegMap *VRM;
+  const SIRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+  SIMachineFunctionInfo *MFI;
+};
+
+class SILowerWWMCopiesLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SILowerWWMCopies() : MachineFunctionPass(ID) {
-    initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
+  SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) {
+    initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -43,34 +62,26 @@ class SILowerWWMCopies : public MachineFunctionPass {
   StringRef getPassName() const override { return "SI Lower WWM Copies"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
+    AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
+    AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>();
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
-
-private:
-  bool isSCCLiveAtMI(const MachineInstr &MI);
-  void addToWWMSpills(MachineFunction &MF, Register Reg);
-
-  LiveIntervals *LIS;
-  SlotIndexes *Indexes;
-  VirtRegMap *VRM;
-  const SIRegisterInfo *TRI;
-  const MachineRegisterInfo *MRI;
-  SIMachineFunctionInfo *MFI;
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
+INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
-INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
-                    false)
+INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
+                    false, false)
 
-char SILowerWWMCopies::ID = 0;
+char SILowerWWMCopiesLegacy::ID = 0;
 
-char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
+char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID;
 
 bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
   // We can't determine the liveness info if LIS isn't available. Early return
@@ -90,23 +101,44 @@ void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
   if (Reg.isPhysical())
     return;
 
+  // FIXME: VRM may be null here.
   MCRegister PhysReg = VRM->getPhys(Reg);
   assert(PhysReg && "should have allocated a physical register");
 
   MFI->allocateWWMSpill(MF, PhysReg);
 }
 
-bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
+bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+  auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+
+  auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
+  auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
+
+  auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
+  auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
+
+  SILowerWWMCopies Impl(LIS, Indexes, VRM);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+SILowerWWMCopiesPass::run(MachineFunction &MF,
+                          MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
+  auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
+  auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF);
+
+  SILowerWWMCopies Impl(LIS, Indexes, VRM);
+  Impl.run(MF);
+  return PreservedAnalyses::all();
+}
+
+bool SILowerWWMCopies::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
 
   MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
-  LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
-  auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
-  Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
-  auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
-  VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
   TRI = ST.getRegisterInfo();
   MRI = &MF.getRegInfo();
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h
new file mode 100644
index 0000000000000..cfc8100901760
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h
@@ -0,0 +1,22 @@
+//===- SILowerWWMCopies.h ---------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H
+#define LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SILowerWWMCopiesPass : public PassInfoMixin<SILowerWWMCopiesPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 319ada3b27bd5..d9c0aa300855f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2487,6 +2487,7 @@ unsigned getRegBitWidth(unsigned RCID) {
   case AMDGPU::AReg_128_Align2RegClassID:
   case AMDGPU::AV_128RegClassID:
   case AMDGPU::AV_128_Align2RegClassID:
+  case AMDGPU::SReg_128_XNULLRegClassID:
     return 128;
   case AMDGPU::SGPR_160RegClassID:
   case AMDGPU::SReg_160RegClassID:
@@ -2523,6 +2524,7 @@ unsigned getRegBitWidth(unsigned RCID) {
   case AMDGPU::AReg_256_Align2RegClassID:
   case AMDGPU::AV_256RegClassID:
   case AMDGPU::AV_256_Align2RegClassID:
+  case AMDGPU::SReg_256_XNULLRegClassID:
     return 256;
   case AMDGPU::SGPR_288RegClassID:
   case AMDGPU::SReg_288RegClassID:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 4ad26ee895c7d..1e76bf7056cc8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -232,7 +232,8 @@ void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
   if (isLegacy())
     return;
   // Msgpack format.
-  getHwStage(CC)[".entry_point"] = MsgPackDoc.getNode(Name, /*Copy=*/true);
+  getHwStage(CC)[".entry_point_symbol"] =
+      MsgPackDoc.getNode(Name, /*Copy=*/true);
 }
 
 // Set the number of used vgprs in the metadata. This is an optional
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index e96369b5e6e24..947ac5c27620f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -335,7 +335,9 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
 let FPDPRounding = 1 in {
   let Predicates = [Has16BitInsts, isGFX8Only] in {
     defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
-    defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
+    let isCommutable = 1 in {
+      defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
+    } // End isCommutable = 1
   } // End Predicates = [Has16BitInsts, isGFX8Only]
 
   let SubtargetPredicate = isGFX9Plus in {
@@ -639,8 +641,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
 defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
 defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
 
-defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
-defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+let isCommutable = 1 in {
+  defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+  defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+} // End isCommutable = 1
 
 defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
 defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
@@ -1254,8 +1258,9 @@ let SubtargetPredicate = isGFX10Plus in {
     def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
     def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
   }
-
-  defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
+  let isCommutable = 1 in {
+    defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
+  } // End isCommutable = 1
   defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
 
 } // End SubtargetPredicate = isGFX10Plus
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index da70525b2bc29..91ad2cafe9b54 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -899,25 +899,40 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
 
 multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
   def NAME : VOPC_Class_Profile<sched, f16>;
-  def _t16 : VOPC_Class_Profile<sched, f16, i16> {
+  def _t16 : VOPC_Class_Profile_Base<sched, f16, f16> {
     let IsTrue16 = 1;
     let IsRealTrue16 = 1;
-    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1RC64 = VSrc_b32;
-    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-    let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-    let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+    let HasOpSel = 1;
+    let HasModifiers = 1; // All instructions at least have OpSel
+    let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
+    let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+    let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+    let Src0VOP3DPP = VGPRSrc_16;
+    let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+
+    let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 1/*IsVOP3Encoding*/>.ret;
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16 : VOPC_Class_Profile_Base<sched, f16, f16> {
     let IsTrue16 = 1;
     let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
-    let DstRC64 = getVALUDstForVT<DstVT>.ret;
     let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1RC64 = VSrc_b32;
     let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
@@ -927,6 +942,14 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
     let Src0VOP3DPP = VGPRSrc_32;
     let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
     let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+
+    let DstRC64 = getVALUDstForVT<DstVT>.ret;
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
     let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
     let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
@@ -1838,22 +1861,22 @@ multiclass VOPCX_Real_t16_and_fake16_gfx11_gfx12<bits<9> op, string asm_name,
   defm _fake16: VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
 }
 
-defm V_CMP_F_F16_fake16      : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
+defm V_CMP_F_F16             : VOPC_Real_t16_and_fake16_gfx11<0x000, "v_cmp_f_f16">;
 defm V_CMP_LT_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_fake16      : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_fake16      : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_fake16    : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
-defm V_CMP_T_F16_fake16      : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_fake16", "v_cmp_tru_f16">;
+defm V_CMP_EQ_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
+defm V_CMP_LE_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
+defm V_CMP_GT_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
+defm V_CMP_LG_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
+defm V_CMP_GE_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
+defm V_CMP_O_F16             : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
+defm V_CMP_U_F16             : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
+defm V_CMP_NGE_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
+defm V_CMP_NLG_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
+defm V_CMP_NGT_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
+defm V_CMP_NLE_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
+defm V_CMP_NEQ_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
+defm V_CMP_NLT_F16           : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
+defm V_CMP_T_F16             : VOPC_Real_t16_and_fake16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16", "v_cmp_tru_f16">;
 
 defm V_CMP_F_F32      : VOPC_Real_gfx11<0x010>;
 defm V_CMP_LT_F32     : VOPC_Real_gfx11_gfx12<0x011>;
@@ -1920,7 +1943,7 @@ defm V_CMP_NE_U64     : VOPC_Real_gfx11_gfx12<0x05d>;
 defm V_CMP_GE_U64     : VOPC_Real_gfx11_gfx12<0x05e>;
 defm V_CMP_T_U64      : VOPC_Real_gfx11<0x05f>;
 
-defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
+defm V_CMP_CLASS_F16     : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
 defm V_CMP_CLASS_F32     : VOPC_Real_gfx11_gfx12<0x07e>;
 defm V_CMP_CLASS_F64     : VOPC_Real_gfx11_gfx12<0x07f>;
 
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 78db68fca3050..aa30c8a2a96da 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -294,7 +294,8 @@ void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 void ARCInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
     bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -323,7 +324,8 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         Register DestReg, int FrameIndex,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+                                        Register VReg,
+                                        MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h
index e25f990252260..8861b4689925f 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -68,18 +68,17 @@ class ARCInstrInfo : public ARCGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2bca2c08c3454..c167c1897bc91 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1118,7 +1118,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                            Register SrcReg, bool isKill, int FI,
                                            const TargetRegisterClass *RC,
                                            const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+                                           Register VReg,
+                                           MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   Align Alignment = MFI.getObjectAlign(FI);
@@ -1379,12 +1380,10 @@ Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
   return false;
 }
 
-void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator I,
-                                            Register DestReg, int FI,
-                                            const TargetRegisterClass *RC,
-                                            const TargetRegisterInfo *TRI,
-                                            Register VReg) const {
+void ARMBaseInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 9422e12c5dfc5..ae760e881e7fa 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -212,18 +212,17 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index a38aa3de40d90..cf151e1f8458f 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -117,7 +117,8 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           Register SrcReg, bool isKill, int FI,
                                           const TargetRegisterClass *RC,
                                           const TargetRegisterInfo *TRI,
-                                          Register VReg) const {
+                                          Register VReg,
+                                          MachineInstr::MIFlag Flags) const {
   assert((RC == &ARM::tGPRRegClass ||
           (SrcReg.isPhysical() && isARMLowRegister(SrcReg))) &&
          "Unknown regclass!");
@@ -141,12 +142,10 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   }
 }
 
-void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator I,
-                                           Register DestReg, int FI,
-                                           const TargetRegisterClass *RC,
-                                           const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+void Thumb1InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
           (DestReg.isPhysical() && isARMLowRegister(DestReg))) &&
          "Unknown regclass!");
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 84241fb8a9a66..b9eb58692bab0 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -41,18 +41,17 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool canCopyGluedNodeDuringSchedule(SDNode *N) const override;
 private:
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 4d759d3bd5a3c..3e7bd05b1d9d1 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -166,7 +166,8 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           Register SrcReg, bool isKill, int FI,
                                           const TargetRegisterClass *RC,
                                           const TargetRegisterInfo *TRI,
-                                          Register VReg) const {
+                                          Register VReg,
+                                          MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -206,12 +207,10 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                         Register());
 }
 
-void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator I,
-                                           Register DestReg, int FI,
-                                           const TargetRegisterClass *RC,
-                                           const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+void Thumb2InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 70ee3270e64ac..7e751c5b6bb9e 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -42,18 +42,17 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 7d58ece95c869..5ba83b616db37 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -127,7 +127,8 @@ Register AVRInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
 void AVRInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool isKill, int FrameIndex, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
 
@@ -161,7 +162,8 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         Register DestReg, int FrameIndex,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+                                        Register VReg,
+                                        MachineInstr::MIFlag Flags) const {
   MachineFunction &MF = *MBB.getParent();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
 
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
index 8eb4292f2422d..452160a06663d 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -77,17 +77,16 @@ class AVRInstrInfo : public AVRGenInstrInfo {
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
   Register isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
   Register isStoreToStackSlot(const MachineInstr &MI,
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 1b07e7ffc0d31..c79fb99ba5cf4 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -127,7 +127,8 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        Register SrcReg, bool IsKill, int FI,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI,
-                                       Register VReg) const {
+                                       Register VReg,
+                                       MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
@@ -146,12 +147,10 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     llvm_unreachable("Can't store this register to stack slot");
 }
 
-void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        Register DestReg, int FI,
-                                        const TargetRegisterClass *RC,
-                                        const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+void BPFInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h
index a6b6fd7dc4d96..904d94d47e476 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -36,18 +36,17 @@ class BPFInstrInfo : public BPFGenInstrInfo {
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index a2bb87bcaaf94..75d581382fe5f 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -393,7 +393,8 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                         Register SrcReg, bool IsKill, int FI,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+                                        Register VReg,
+                                        MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
@@ -432,12 +433,10 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addMemOperand(MMO);
 }
 
-void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator I,
-                                         Register DestReg, int FI,
-                                         const TargetRegisterClass *RC,
-                                         const TargetRegisterInfo *TRI,
-                                         Register VReg) const {
+void CSKYInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index 54c1106310d85..3e0166ecf8e0a 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -40,18 +40,17 @@ class CSKYInstrInfo : public CSKYGenInstrInfo {
   Register isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 4b20a64cb0722..beb9b56dba30a 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -268,18 +268,29 @@ def miss : DXILShaderStage;
 def all_stages : DXILShaderStage;
 // Denote support for DXIL Op to have been removed
 def removed : DXILShaderStage;
+
 // DXIL Op attributes
 
+// A function attribute denotes that there is a corresponding LLVM function
+// attribute that will be set when building the DXIL op. The mapping is defined
+// by setDXILAttributes in DXILOpBuilder.cpp
 class DXILAttribute;
 
-def ReadOnly : DXILAttribute;
 def ReadNone : DXILAttribute;
-def IsDerivative : DXILAttribute;
-def IsGradient : DXILAttribute;
-def IsFeedback : DXILAttribute;
-def IsWave : DXILAttribute;
-def NeedsUniformInputs : DXILAttribute;
-def IsBarrier : DXILAttribute;
+def ReadOnly : DXILAttribute;
+def NoDuplicate : DXILAttribute;
+def NoReturn : DXILAttribute;
+
+// A property is simply used to mark that a DXIL op belongs to a sub-group of
+// DXIL ops, and it is used to query if a particular op holds this property.
+// This is used for the static analysis of DXIL ops.
+class DXILProperty;
+
+def IsBarrier : DXILProperty;
+def IsGradient : DXILProperty;
+def IsFeedback : DXILProperty;
+def IsWave : DXILProperty;
+def RequiresUniformInputs : DXILProperty;
 
 class Overloads<Version ver, list<DXILOpParamType> ols> {
   Version dxil_version = ver;
@@ -293,7 +304,7 @@ class Stages<Version ver, list<DXILShaderStage> st> {
 
 class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   Version dxil_version = ver;
-  list<DXILAttribute> op_attrs = attrs;
+  list<DXILAttribute> fn_attrs = attrs;
 }
 
 defvar BarrierMode_DeviceMemoryBarrier              = 2;
@@ -386,6 +397,9 @@ class DXILOp<int opcode, DXILOpClass opclass> {
 
   // Versioned attributes of operation
   list<Attributes> attributes = [];
+
+  // List of properties. Default to no properties.
+  list<DXILProperty> properties = [];
 }
 
 // Concrete definitions of DXIL Operations
@@ -805,6 +819,10 @@ def CreateHandle : DXILOp<57, createHandle> {
   let arguments = [Int8Ty, Int32Ty, Int32Ty, Int1Ty];
   let result = HandleTy;
   let stages = [Stages<DXIL1_0, [all_stages]>, Stages<DXIL1_6, [removed]>];
+  // NOTE: The ReadOnly attribute was set for consistency with DXC. However, it
+  // seems like ReadNone may more appropiately describe it. So noted to
+  // consider a change in the future
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def BufferLoad : DXILOp<68, bufferLoad> {
@@ -816,6 +834,7 @@ def BufferLoad : DXILOp<68, bufferLoad> {
       [Overloads<DXIL1_0,
                  [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def BufferStore : DXILOp<69, bufferStore> {
@@ -844,6 +863,7 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
   let result = Int1Ty;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def Discard : DXILOp<82, discard> {
@@ -955,8 +975,8 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let intrinsics = [ IntrinSelect<int_dx_dot4add_i8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
@@ -965,8 +985,8 @@ def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
   let intrinsics = [ IntrinSelect<int_dx_dot4add_u8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def AnnotateHandle : DXILOp<216, annotateHandle> {
@@ -974,6 +994,7 @@ def AnnotateHandle : DXILOp<216, annotateHandle> {
   let arguments = [HandleTy, ResPropsTy];
   let result = HandleTy;
   let stages = [Stages<DXIL1_6, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
@@ -981,6 +1002,7 @@ def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
   let arguments = [ResBindTy, Int32Ty, Int1Ty];
   let result = HandleTy;
   let stages = [Stages<DXIL1_6, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def WaveActiveAllTrue : DXILOp<114, waveAllTrue> {
@@ -989,6 +1011,7 @@ def WaveActiveAllTrue : DXILOp<114, waveAllTrue> {
   let arguments = [Int1Ty];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
@@ -997,6 +1020,7 @@ def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
   let arguments = [Int1Ty];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def WaveActiveOp : DXILOp<119, waveActiveOp> {
@@ -1023,7 +1047,7 @@ def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+  let properties = [IsWave];
 }
 
 def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
@@ -1033,7 +1057,7 @@ def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+  let properties = [IsWave];
 }
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
@@ -1042,7 +1066,8 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let arguments = [];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
+  let properties = [IsWave];
 }
 
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
@@ -1051,7 +1076,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let arguments = [Int1Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+  let properties = [IsWave];
 }
 
 def Barrier : DXILOp<80, barrier> {
@@ -1066,4 +1091,5 @@ def Barrier : DXILOp<80, barrier> {
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
   let attributes = [Attributes<DXIL1_0, []>];
+  let properties = [IsBarrier];
 }
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index 022cd57795a06..3ba74e7570fd7 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -30,6 +30,28 @@ enum class OpParamType : unsigned {
 #include "DXILOperation.inc"
 };
 
+struct Attributes {
+#define DXIL_ATTRIBUTE(Name) bool Name = false;
+#include "DXILOperation.inc"
+};
+
+inline Attributes operator|(Attributes a, Attributes b) {
+  Attributes c;
+#define DXIL_ATTRIBUTE(Name) c.Name = a.Name | b.Name;
+#include "DXILOperation.inc"
+  return c;
+}
+
+inline Attributes &operator|=(Attributes &a, Attributes &b) {
+  a = a | b;
+  return a;
+}
+
+struct Properties {
+#define DXIL_PROPERTY(Name) bool Name = false;
+#include "DXILOperation.inc"
+};
+
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 9f88ccd7a7b7d..badd5aabd6432 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -52,11 +52,6 @@ struct OpStage {
   uint32_t ValidStages;
 };
 
-struct OpAttribute {
-  Version DXILVersion;
-  uint32_t ValidAttrs;
-};
-
 static const char *getOverloadTypeName(OverloadKind Kind) {
   switch (Kind) {
   case OverloadKind::HALF:
@@ -158,7 +153,6 @@ struct OpCodeProperty {
   unsigned OpCodeClassNameOffset;
   llvm::SmallVector<OpOverload> Overloads;
   llvm::SmallVector<OpStage> Stages;
-  llvm::SmallVector<OpAttribute> Attributes;
   int OverloadParamIndex; // parameter index which control the overload.
                           // When < 0, should be only 1 overload type.
 };
@@ -371,6 +365,61 @@ static std::optional<size_t> getPropIndex(ArrayRef<T> PropList,
   return std::nullopt;
 }
 
+// Helper function to pack an OpCode and VersionTuple into a uint64_t for use
+// in a switch statement
+constexpr static uint64_t computeSwitchEnum(dxil::OpCode OpCode,
+                                            uint16_t VersionMajor,
+                                            uint16_t VersionMinor) {
+  uint64_t OpCodePack = (uint64_t)OpCode;
+  return (OpCodePack << 32) | (VersionMajor << 16) | VersionMinor;
+}
+
+// Retreive all the set attributes for a DXIL OpCode given the targeted
+// DXILVersion
+static dxil::Attributes getDXILAttributes(dxil::OpCode OpCode,
+                                          VersionTuple DXILVersion) {
+  // Instantiate all versions to iterate through
+  SmallVector<Version> Versions = {
+#define DXIL_VERSION(MAJOR, MINOR) {MAJOR, MINOR},
+#include "DXILOperation.inc"
+  };
+
+  dxil::Attributes Attributes;
+  for (auto Version : Versions) {
+    if (DXILVersion < VersionTuple(Version.Major, Version.Minor))
+      continue;
+
+    // Switch through and match an OpCode with the specific version and set the
+    // corresponding flag(s) if available
+    switch (computeSwitchEnum(OpCode, Version.Major, Version.Minor)) {
+#define DXIL_OP_ATTRIBUTES(OpCode, VersionMajor, VersionMinor, ...)            \
+  case computeSwitchEnum(OpCode, VersionMajor, VersionMinor): {                \
+    auto Other = dxil::Attributes{__VA_ARGS__};                                \
+    Attributes |= Other;                                                       \
+    break;                                                                     \
+  };
+#include "DXILOperation.inc"
+    }
+  }
+  return Attributes;
+}
+
+// Retreive the set of DXIL Attributes given the version and map them to an
+// llvm function attribute that is set onto the instruction
+static void setDXILAttributes(CallInst *CI, dxil::OpCode OpCode,
+                              VersionTuple DXILVersion) {
+  dxil::Attributes Attributes = getDXILAttributes(OpCode, DXILVersion);
+  if (Attributes.ReadNone)
+    CI->setDoesNotAccessMemory();
+  if (Attributes.ReadOnly)
+    CI->setOnlyReadsMemory();
+  if (Attributes.NoReturn)
+    CI->setDoesNotReturn();
+  if (Attributes.NoDuplicate)
+    CI->setCannotDuplicate();
+  return;
+}
+
 namespace llvm {
 namespace dxil {
 
@@ -465,7 +514,13 @@ Expected<CallInst *> DXILOpBuilder::tryCreateOp(dxil::OpCode OpCode,
   OpArgs.push_back(IRB.getInt32(llvm::to_underlying(OpCode)));
   OpArgs.append(Args.begin(), Args.end());
 
-  return IRB.CreateCall(DXILFn, OpArgs, Name);
+  // Create the function call instruction
+  CallInst *CI = IRB.CreateCall(DXILFn, OpArgs, Name);
+
+  // We then need to attach available function attributes
+  setDXILAttributes(CI, OpCode, DXILVersion);
+
+  return CI;
 }
 
 CallInst *DXILOpBuilder::createOp(dxil::OpCode OpCode, ArrayRef<Value *> Args,
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 366ccb0e00fa8..f30c45e820612 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -959,7 +959,8 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                            Register SrcReg, bool isKill, int FI,
                                            const TargetRegisterClass *RC,
                                            const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+                                           Register VReg,
+                                           MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1002,12 +1003,10 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   }
 }
 
-void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator I,
-                                            Register DestReg, int FI,
-                                            const TargetRegisterClass *RC,
-                                            const TargetRegisterInfo *TRI,
-                                            Register VReg) const {
+void HexagonInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 854c3694ceba7..6bfb6d42095ba 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -181,21 +181,20 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   /// stack frame index. The store instruction is to be added to the given
   /// machine basic block before the specified machine instruction. If isKill
   /// is true, the register operand is the last use and must be marked kill.
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   /// Load the specified register of the given register class from the specified
   /// stack frame index. The load instruction is to be added to the given
   /// machine basic block before the specified machine instruction.
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   /// This function is called for all pseudo instructions
   /// that remain after register allocation. Many pseudo instructions are
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index cd304d1a0a189..bd13c79ca79d6 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -48,7 +48,8 @@ void LanaiInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
     Register SourceRegister, bool IsKill, int FrameIndex,
     const TargetRegisterClass *RegisterClass,
-    const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/) const {
+    const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/,
+    MachineInstr::MIFlag /*Flags*/) const {
   DebugLoc DL;
   if (Position != MBB.end()) {
     DL = Position->getDebugLoc();
@@ -68,7 +69,8 @@ void LanaiInstrInfo::loadRegFromStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
     Register DestinationRegister, int FrameIndex,
     const TargetRegisterClass *RegisterClass,
-    const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/) const {
+    const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/,
+    MachineInstr::MIFlag /*Flags*/) const {
   DebugLoc DL;
   if (Position != MBB.end()) {
     DL = Position->getDebugLoc();
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 2630464f0a76f..13cf2b3571039 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -53,19 +53,19 @@ class LanaiInstrInfo : public LanaiGenInstrInfo {
                    bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator Position,
-                           Register SourceRegister, bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RegisterClass,
-                           const TargetRegisterInfo *RegisterInfo,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator Position,
-                            Register DestinationRegister, int FrameIndex,
-                            const TargetRegisterClass *RegisterClass,
-                            const TargetRegisterInfo *RegisterInfo,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
+      Register SourceRegister, bool IsKill, int FrameIndex,
+      const TargetRegisterClass *RegisterClass,
+      const TargetRegisterInfo *RegisterInfo, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
+      Register DestinationRegister, int FrameIndex,
+      const TargetRegisterClass *RegisterClass,
+      const TargetRegisterInfo *RegisterInfo, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f9f1b097623e0..9eb607c69a952 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -271,6 +271,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setCondCodeAction(
           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
           Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
     }
     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
       setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -289,6 +290,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
                          ISD::SETUGE, ISD::SETUGT},
                         VT, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
     }
     setOperationAction(ISD::CTPOP, GRLenVT, Legal);
     setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
@@ -327,6 +329,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setCondCodeAction(
           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
           Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
     }
     for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
       setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -345,6 +348,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
                          ISD::SETUGE, ISD::SETUGT},
                         VT, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
     }
   }
 
@@ -448,10 +452,25 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
     return lowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::BITREVERSE:
     return lowerBITREVERSE(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return lowerSCALAR_TO_VECTOR(Op, DAG);
   }
   return SDValue();
 }
 
+SDValue
+LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  MVT OpVT = Op.getSimpleValueType();
+
+  SDValue Vector = DAG.getUNDEF(OpVT);
+  SDValue Val = Op.getOperand(0);
+  SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
+
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
+}
+
 SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
                                                  SelectionDAG &DAG) const {
   EVT ResTy = Op->getValueType(0);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index e1bab9ebdd3f0..a215ab523874b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -336,6 +336,7 @@ class LoongArchTargetLowering : public TargetLowering {
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 4e49ba6e339a6..903ee76fbc8d5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -113,7 +113,8 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 void LoongArchInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
     bool IsKill, int FI, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   MachineFrameInfo &MFI = MF->getFrameInfo();
 
@@ -146,12 +147,10 @@ void LoongArchInstrInfo::storeRegToStackSlot(
       .addMemOperand(MMO);
 }
 
-void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                              MachineBasicBlock::iterator I,
-                                              Register DstReg, int FI,
-                                              const TargetRegisterClass *RC,
-                                              const TargetRegisterInfo *TRI,
-                                              Register VReg) const {
+void LoongArchInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   MachineFrameInfo &MFI = MF->getFrameInfo();
   DebugLoc DL;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index a5b31878bfa1c..08dc54dbfe98d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -34,17 +34,16 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DstReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   // Materializes the given integer Val into DstReg.
   void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 49ae440073f2e..24b5ed5a9344f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1562,6 +1562,12 @@ def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
 def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
           (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
 
+// scalar_to_vector
+def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)),
+          (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>;
+def : Pat<(v4f64 (scalar_to_vector FPR64:$fj)),
+          (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>; 
+
 // XVPICKVE2GR_W[U]
 def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32),
           (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index ced430216b2fe..d2063a8aaae9b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1719,6 +1719,12 @@ def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
 def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
           (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>;
 
+// scalar_to_vector
+def : Pat<(v4f32 (scalar_to_vector FPR32:$fj)),
+          (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>;
+def : Pat<(v2f64 (scalar_to_vector FPR64:$fj)),
+          (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>;
+
 // VPICKVE2GR_{B/H/W}[U]
 def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8),
           (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>;
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 6340464520950..182582642c50e 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -844,7 +844,8 @@ bool M68kInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
 void M68kInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
   assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) &&
          "Stack slot is too small to store");
@@ -862,7 +863,8 @@ void M68kInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                          Register DstReg, int FrameIndex,
                                          const TargetRegisterClass *RC,
                                          const TargetRegisterInfo *TRI,
-                                         Register VReg) const {
+                                         Register VReg,
+                                         MachineInstr::MIFlag Flags) const {
   const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
   assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) &&
          "Stack slot is too small to load");
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h
index 5d81956d89fdf..b72fd7a53b93d 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -278,18 +278,17 @@ class M68kInstrInfo : public M68kGenInstrInfo {
                          unsigned &Size, unsigned &Offset,
                          const MachineFunction &MF) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 503939a880b83..3018d97f66b86 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -31,7 +31,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI)
 void MSP430InstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool isKill, int FrameIdx, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -54,12 +55,10 @@ void MSP430InstrInfo::storeRegToStackSlot(
     llvm_unreachable("Cannot store this register to stack slot!");
 }
 
-void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                           Register DestReg, int FrameIdx,
-                                           const TargetRegisterClass *RC,
-                                           const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+void MSP430InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+    int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 113a22318bec5..71395557454e8 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -40,17 +40,16 @@ class MSP430InstrInfo : public MSP430GenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIdx, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIdx, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index c1f5c2fb84e8b..351caf55062e3 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -106,7 +106,8 @@ void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
                                       Register SrcReg, bool isKill, int FI,
                                       const TargetRegisterClass *RC,
                                       const TargetRegisterInfo *TRI,
-                                      int64_t Offset) const {
+                                      int64_t Offset,
+                                      MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -119,12 +120,10 @@ void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
       .addMemOperand(MMO);
 }
 
-void Mips16InstrInfo::loadRegFromStack(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I,
-                                       Register DestReg, int FI,
-                                       const TargetRegisterClass *RC,
-                                       const TargetRegisterInfo *TRI,
-                                       int64_t Offset) const {
+void Mips16InstrInfo::loadRegFromStack(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    int64_t Offset, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.h b/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 8e73c8079b0f8..095a1b2239a36 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -53,19 +53,17 @@ class Mips16InstrInfo : public MipsInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStack(MachineBasicBlock &MBB,
-                       MachineBasicBlock::iterator MBBI,
-                       Register SrcReg, bool isKill, int FrameIndex,
-                       const TargetRegisterClass *RC,
-                       const TargetRegisterInfo *TRI,
-                       int64_t Offset) const override;
-
-  void loadRegFromStack(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator MBBI,
-                        Register DestReg, int FrameIndex,
-                        const TargetRegisterClass *RC,
-                        const TargetRegisterInfo *TRI,
-                        int64_t Offset) const override;
+  void storeRegToStack(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, int64_t Offset,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStack(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, int64_t Offset,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 3a9421fae0f60..258010d331181 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -402,7 +402,7 @@ void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
   for (const MachineBasicBlock *S : MBB.successors())
     if (S != &SuccBB)
       for (const auto &LI : S->liveins())
-        Uses.set(LI.PhysReg);
+        Uses.set(LI.PhysReg.id());
 }
 
 bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index 2ff12f80b1714..0fa8257089bc5 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -137,36 +137,34 @@ class MipsInstrInfo : public MipsGenInstrInfo {
   /// Return the number of bytes of code the specified instruction may be.
   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override {
-    storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override {
+    storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0, Flags);
   }
 
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override {
-    loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override {
+    loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0, Flags);
   }
 
-  virtual void storeRegToStack(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator MI,
-                               Register SrcReg, bool isKill, int FrameIndex,
-                               const TargetRegisterClass *RC,
-                               const TargetRegisterInfo *TRI,
-                               int64_t Offset) const = 0;
-
-  virtual void loadRegFromStack(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI,
-                                Register DestReg, int FrameIndex,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI,
-                                int64_t Offset) const = 0;
+  virtual void
+  storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                  Register SrcReg, bool isKill, int FrameIndex,
+                  const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+                  int64_t Offset,
+                  MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const = 0;
+
+  virtual void loadRegFromStack(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, int64_t Offset,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const = 0;
 
   virtual void adjustStackPtr(unsigned SP, int64_t Amount,
                               MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 851fd2f3ead7d..c245a18ae0bef 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -209,11 +209,13 @@ MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
   return std::nullopt;
 }
 
-void MipsSEInstrInfo::
-storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                Register SrcReg, bool isKill, int FI,
-                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
-                int64_t Offset) const {
+void MipsSEInstrInfo::storeRegToStack(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator I,
+                                      Register SrcReg, bool isKill, int FI,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI,
+                                      int64_t Offset,
+                                      MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
 
@@ -283,10 +285,10 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 }
 
-void MipsSEInstrInfo::
-loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                 Register DestReg, int FI, const TargetRegisterClass *RC,
-                 const TargetRegisterInfo *TRI, int64_t Offset) const {
+void MipsSEInstrInfo::loadRegFromStack(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    int64_t Offset, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index 36bddba10410c..9004254857f32 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -47,19 +47,17 @@ class MipsSEInstrInfo : public MipsInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStack(MachineBasicBlock &MBB,
-                       MachineBasicBlock::iterator MI,
-                       Register SrcReg, bool isKill, int FrameIndex,
-                       const TargetRegisterClass *RC,
-                       const TargetRegisterInfo *TRI,
-                       int64_t Offset) const override;
-
-  void loadRegFromStack(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator MI,
-                        Register DestReg, int FrameIndex,
-                        const TargetRegisterClass *RC,
-                        const TargetRegisterInfo *TRI,
-                        int64_t Offset) const override;
+  void storeRegToStack(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, int64_t Offset,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStack(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, int64_t Offset,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 9af8715ef52ae..3ca8b4d294079 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -39,6 +39,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
   def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
 
 def SM90a: FeatureSM<"90a", 901>;
+def SM100a: FeatureSM<"100a", 1001>;
 
 foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
                    70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -74,6 +75,7 @@ def : Proc<"sm_89", [SM89, PTX78]>;
 def : Proc<"sm_90", [SM90, PTX78]>;
 def : Proc<"sm_90a", [SM90a, PTX80]>;
 def : Proc<"sm_100", [SM100, PTX86]>;
+def : Proc<"sm_100a", [SM100a, PTX86]>;
 
 def NVPTXInstrInfo : InstrInfo {
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index a1d9f01712018..06b111c69fb74 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -38,17 +38,16 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
    *                                  int &FrameIndex) const;
    * virtual Register isStoreToStackSlot(const MachineInstr *MI,
    *                                 int &FrameIndex) const;
-   * virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-   *                              MachineBasicBlock::iterator MBBI,
-   *                             unsigned SrcReg, bool isKill, int FrameIndex,
-   *                              const TargetRegisterClass *RC,
-   *                              Register VReg) const;
-   * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-   *                               MachineBasicBlock::iterator MBBI,
-   *                               unsigned DestReg, int FrameIndex,
-   *                               const TargetRegisterClass *RC,
-   *                               const TargetRegisterInfo *TRI,
-   *                               Register VReg) const;
+   * virtual void storeRegToStackSlot(
+   *    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+   *    unsigned SrcReg, bool isKill, int FrameIndex,
+   *    const TargetRegisterClass *RC, Register VReg,
+   *    MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const;
+   * virtual void loadRegFromStackSlot(
+   *    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+   *    unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC,
+   *    const TargetRegisterInfo *TRI, Register VReg,
+   *    MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const;
    */
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 0cd584c407444..20b8bef1899b4 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -47,7 +47,9 @@ using namespace llvm;
 
 #define DEBUG_TYPE "nvptx-reflect"
 
-namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry &);
+}
 
 namespace {
 class NVVMReflect : public FunctionPass {
@@ -61,15 +63,15 @@ class NVVMReflect : public FunctionPass {
 
   bool runOnFunction(Function &) override;
 };
-}
+} // namespace
 
 FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
   return new NVVMReflect(SmVersion);
 }
 
 static cl::opt<bool>
-NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
-                   cl::desc("NVVM reflection, enabled by default"));
+    NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
+                       cl::desc("NVVM reflection, enabled by default"));
 
 char NVVMReflect::ID = 0;
 INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
@@ -189,8 +191,7 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
   // until we find a terminator that we can then remove.
   while (!ToSimplify.empty()) {
     Instruction *I = ToSimplify.pop_back_val();
-    if (Constant *C =
-            ConstantFoldInstruction(I, F.getDataLayout())) {
+    if (Constant *C = ConstantFoldInstruction(I, F.getDataLayout())) {
       for (User *U : I->users())
         if (Instruction *I = dyn_cast<Instruction>(U))
           ToSimplify.push_back(I);
@@ -220,13 +221,7 @@ bool NVVMReflect::runOnFunction(Function &F) {
   return runNVVMReflect(F, SmVersion);
 }
 
-NVVMReflectPass::NVVMReflectPass() {
-  // Get the CPU string from the command line if not provided.
-  std::string MCPU = codegen::getMCPU();
-  StringRef SM = MCPU;
-  if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
-    SmVersion = 0;
-}
+NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
 
 PreservedAnalyses NVVMReflectPass::run(Function &F,
                                        FunctionAnalysisManager &AM) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index fa45a7fb7fabe..04b58bba7251e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1967,7 +1967,8 @@ void PPCInstrInfo::storeRegToStackSlotNoUpd(
 void PPCInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool isKill, int FrameIdx, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   // We need to avoid a situation in which the value from a VRRC register is
   // spilled using an Altivec instruction and reloaded into a VSRC register
   // using a VSX instruction. The issue with this is that the VSX
@@ -2011,12 +2012,10 @@ void PPCInstrInfo::loadRegFromStackSlotNoUpd(
   NewMIs.back()->addMemOperand(MF, MMO);
 }
 
-void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        Register DestReg, int FrameIdx,
-                                        const TargetRegisterClass *RC,
-                                        const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+void PPCInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+    int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   // We need to avoid a situation in which the value from a VRRC register is
   // spilled using an Altivec instruction and reloaded into a VSRC register
   // using a VSX instruction. The issue with this is that the VSX
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index cd8ecc2dcfac8..69279efe5c151 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -461,12 +461,11 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   // Emits a register spill without updating the register class for vector
   // registers. This ensures that when we spill a vector register the
@@ -477,11 +476,11 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                                 const TargetRegisterClass *RC,
                                 const TargetRegisterInfo *TRI) const;
 
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   // Emits a register reload without updating the register class for vector
   // registers. This ensures that when we reload a vector register the
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 36292e3d572cb..9855028ead9e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3224,8 +3224,25 @@ bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
 
   // Abandon this transform if the constant is needed elsewhere.
   for (const SDNode *U : N->users()) {
-    if (!ISD::isBitwiseLogicOp(U->getOpcode()))
+    switch (U->getOpcode()) {
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
+        return false;
+      break;
+    case RISCVISD::VMV_V_X_VL:
+      if (!Subtarget->hasStdExtZvkb())
+        return false;
+      if (!all_of(U->users(), [](const SDNode *V) {
+            return V->getOpcode() == ISD::AND ||
+                   V->getOpcode() == RISCVISD::AND_VL;
+          }))
+        return false;
+      break;
+    default:
       return false;
+    }
   }
 
   // For 64-bit constants, the instruction sequences get complex,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d1a5a76029145..f7efd5f437fbb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3517,41 +3517,54 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
                                   const RISCVSubtarget &Subtarget) {
   if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
     return SDValue();
-  SDValue Vec = SplatVal.getOperand(0);
+  SDValue Src = SplatVal.getOperand(0);
   // Don't perform this optimization for i1 vectors, or if the element types are
   // different
   // FIXME: Support i1 vectors, maybe by promoting to i8?
   MVT EltTy = VT.getVectorElementType();
-  if (EltTy == MVT::i1 ||
-      EltTy != Vec.getSimpleValueType().getVectorElementType())
+  MVT SrcVT = Src.getSimpleValueType();
+  if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
     return SDValue();
   SDValue Idx = SplatVal.getOperand(1);
   // The index must be a legal type.
   if (Idx.getValueType() != Subtarget.getXLenVT())
     return SDValue();
 
-  // Check that Index lies within VT
-  // TODO: Can we check if the Index is constant and known in-bounds?
-  if (!TypeSize::isKnownLE(Vec.getValueSizeInBits(), VT.getSizeInBits()))
-    return SDValue();
+  // Check that we know Idx lies within VT
+  if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
+    auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
+    if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
+      return SDValue();
+  }
 
+  // Convert fixed length vectors to scalable
   MVT ContainerVT = VT;
   if (VT.isFixedLengthVector())
     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
 
-  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
-                    DAG.getUNDEF(ContainerVT), Vec,
-                    DAG.getVectorIdxConstant(0, DL));
+  MVT SrcContainerVT = SrcVT;
+  if (SrcVT.isFixedLengthVector()) {
+    SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+  }
 
-  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+  // Put Vec in a VT sized vector
+  if (SrcContainerVT.getVectorMinNumElements() <
+      ContainerVT.getVectorMinNumElements())
+    Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+                      DAG.getUNDEF(ContainerVT), Src,
+                      DAG.getVectorIdxConstant(0, DL));
+  else
+    Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
+                      DAG.getVectorIdxConstant(0, DL));
 
-  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
+  // We checked that Idx fits inside VT earlier
+  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
                                Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
-
-  if (!VT.isFixedLengthVector())
-    return Gather;
-
-  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
+  if (VT.isFixedLengthVector())
+    Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
+  return Gather;
 }
 
 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1f7e8d87a11b0..471cd15ee9c87 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -579,7 +579,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          Register SrcReg, bool IsKill, int FI,
                                          const TargetRegisterClass *RC,
                                          const TargetRegisterInfo *TRI,
-                                         Register VReg) const {
+                                         Register VReg,
+                                         MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   MachineFrameInfo &MFI = MF->getFrameInfo();
 
@@ -663,12 +664,10 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   }
 }
 
-void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          Register DstReg, int FI,
-                                          const TargetRegisterClass *RC,
-                                          const TargetRegisterInfo *TRI,
-                                          Register VReg) const {
+void RISCVInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   MachineFunction *MF = MBB.getParent();
   MachineFrameInfo &MFI = MF->getFrameInfo();
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 7e8bcd451a8ef..1c81719c767ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -92,18 +92,17 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool IsKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DstReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   using TargetInstrInfo::foldMemoryOperandImpl;
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 21eedbb626846..268bfe70673a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1932,6 +1932,7 @@ multiclass VPseudoUSStore {
         def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>,
                                         VSESched<LInfo>;
         def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>,
+                                                  RISCVMaskedPseudo<MaskIdx=2>,
                                                   VSESched<LInfo>;
       }
     }
@@ -1958,6 +1959,7 @@ multiclass VPseudoSStore {
         def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>,
                                         VSSSched<eew, LInfo>;
         def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>,
+                                                  RISCVMaskedPseudo<MaskIdx=3>,
                                                   VSSSched<eew, LInfo>;
       }
     }
@@ -1984,6 +1986,7 @@ multiclass VPseudoIStore<bit Ordered> {
               VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
             def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
               VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
+              RISCVMaskedPseudo<MaskIdx=3>,
               VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
           }
         }
@@ -3709,7 +3712,8 @@ multiclass VPseudoUSSegLoad {
           def nf # "E" # eew # "_V_" # LInfo :
             VPseudoUSSegLoadNoMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
           def nf # "E" # eew # "_V_" # LInfo # "_MASK" :
-            VPseudoUSSegLoadMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
+            VPseudoUSSegLoadMask<vreg, eew, nf>, RISCVMaskedPseudo<MaskIdx=2>,
+            VLSEGSched<nf, eew, LInfo>;
         }
       }
     }
@@ -3726,7 +3730,8 @@ multiclass VPseudoUSSegLoadFF {
           def nf # "E" # eew # "FF_V_" # LInfo :
             VPseudoUSSegLoadFFNoMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
           def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" :
-            VPseudoUSSegLoadFFMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
+            VPseudoUSSegLoadFFMask<vreg, eew, nf>, RISCVMaskedPseudo<MaskIdx=2>,
+            VLSEGFFSched<nf, eew, LInfo>;
         }
       }
     }
@@ -3743,6 +3748,7 @@ multiclass VPseudoSSegLoad {
           def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew, nf>,
                                                VLSSEGSched<nf, eew, LInfo>;
           def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew, nf>,
+                                                         RISCVMaskedPseudo<MaskIdx=3>,
                                                          VLSSEGSched<nf, eew, LInfo>;
         }
       }
@@ -3773,6 +3779,7 @@ multiclass VPseudoISegLoad<bit Ordered> {
               def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
                 VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
                                     nf, Ordered>,
+                RISCVMaskedPseudo<MaskIdx=3>,
                 VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
             }
           }
@@ -3792,6 +3799,7 @@ multiclass VPseudoUSSegStore {
           def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask<vreg, eew, nf>,
                                                VSSEGSched<nf, eew, LInfo>;
           def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask<vreg, eew, nf>,
+                                                         RISCVMaskedPseudo<MaskIdx=2>,
                                                          VSSEGSched<nf, eew, LInfo>;
         }
       }
@@ -3809,6 +3817,7 @@ multiclass VPseudoSSegStore {
           def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask<vreg, eew, nf>,
                                                VSSSEGSched<nf, eew, LInfo>;
           def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask<vreg, eew, nf>,
+                                                         RISCVMaskedPseudo<MaskIdx=3>,
                                                          VSSSEGSched<nf, eew, LInfo>;
         }
       }
@@ -3839,6 +3848,7 @@ multiclass VPseudoISegStore<bit Ordered> {
               def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
                 VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
                                      nf, Ordered>,
+                RISCVMaskedPseudo<MaskIdx=3>,
                 VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
             }
           }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index c69d888517521..430d75e5cec5b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -624,6 +624,13 @@ foreach vti = AllIntegerVectors in {
                  vti.RegClass:$rs2,
                  vti.ScalarRegClass:$rs1,
                  vti.AVL, vti.Log2SEW, TA_MA)>;
+    def : Pat<(vti.Vector (and (riscv_splat_vector invLogicImm:$rs1),
+                               vti.RegClass:$rs2)),
+              (!cast<Instruction>("PseudoVANDN_VX_"#vti.LMul.MX)
+                 (vti.Vector (IMPLICIT_DEF)),
+                 vti.RegClass:$rs2,
+                 invLogicImm:$rs1,
+                 vti.AVL, vti.Log2SEW, TA_MA)>;
   }
 }
 
@@ -758,6 +765,20 @@ foreach vti = AllIntegerVectors in {
                  GPR:$vl,
                  vti.Log2SEW,
                  TAIL_AGNOSTIC)>;
+
+    def : Pat<(vti.Vector (riscv_and_vl (riscv_splat_vector invLogicImm:$rs1),
+                                        (vti.Vector vti.RegClass:$rs2),
+                                        (vti.Vector vti.RegClass:$passthru),
+                                        (vti.Mask V0),
+                                        VLOpFrag)),
+              (!cast<Instruction>("PseudoVANDN_VX_"#vti.LMul.MX#"_MASK")
+                 vti.RegClass:$passthru,
+                 vti.RegClass:$rs2,
+                 invLogicImm:$rs1,
+                 (vti.Mask V0),
+                 GPR:$vl,
+                 vti.Log2SEW,
+                 TAIL_AGNOSTIC)>;
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
index 550f83a59b8b0..a8e8de2497988 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
@@ -277,5 +277,4 @@ defm : UnsupportedSchedSFB;
 defm : UnsupportedSchedZabha;
 defm : UnsupportedSchedXsfvcp;
 defm : UnsupportedSchedZvk;
-defm : UnsupportedSchedZvkned;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index dc20fdcea4d78..4aa74b020825c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -105,7 +105,6 @@ def : ReadAdvance<ReadIMul32, 0>;
 //===----------------------------------------------------------------------===//
 // Unsupported extensions
 defm : UnsupportedSchedA;
-defm : UnsupportedSchedD;
 defm : UnsupportedSchedF;
 defm : UnsupportedSchedSFB;
 defm : UnsupportedSchedV;
@@ -117,7 +116,6 @@ defm : UnsupportedSchedZbs;
 defm : UnsupportedSchedZbkb;
 defm : UnsupportedSchedZbkx;
 defm : UnsupportedSchedZfa;
-defm : UnsupportedSchedZfh;
 defm : UnsupportedSchedXsfvcp;
 defm : UnsupportedSchedZvk;
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
index a1ee4aada853b..4c806fd7c9888 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
@@ -134,9 +134,8 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
   TargetOptions Options;
   std::optional<Reloc::Model> RM;
   std::optional<CodeModel::Model> CM;
-  std::unique_ptr<TargetMachine> Target =
-      std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
-          TargetTriple.getTriple(), "", "", Options, RM, CM, OLevel));
+  std::unique_ptr<TargetMachine> Target(TheTarget->createTargetMachine(
+      TargetTriple.getTriple(), "", "", Options, RM, CM, OLevel));
   if (!Target) {
     ErrMsg = "Could not allocate target machine!";
     return false;
@@ -158,10 +157,10 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
   TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));
   legacy::PassManager PM;
   PM.add(new TargetLibraryInfoWrapperPass(TLII));
-  MachineModuleInfoWrapperPass *MMIWP =
-      new MachineModuleInfoWrapperPass(Target.get());
+  std::unique_ptr<MachineModuleInfoWrapperPass> MMIWP(
+      new MachineModuleInfoWrapperPass(Target.get()));
   const_cast<TargetLoweringObjectFile *>(Target->getObjFileLowering())
-      ->Initialize(MMIWP->getMMI().getContext(), *Target);
+      ->Initialize(MMIWP.get()->getMMI().getContext(), *Target);
 
   SmallString<4096> OutBuffer;
   raw_svector_ostream OutStream(OutBuffer);
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index bc57a556c9ef6..225d6cd80f908 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -528,7 +528,8 @@ void SparcInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          Register SrcReg, bool isKill, int FI,
                                          const TargetRegisterClass *RC,
                                          const TargetRegisterInfo *TRI,
-                                         Register VReg) const {
+                                         Register VReg,
+                                         MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -563,12 +564,10 @@ void SparcInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     llvm_unreachable("Can't store this register to stack slot");
 }
 
-void SparcInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          Register DestReg, int FI,
-                                          const TargetRegisterClass *RC,
-                                          const TargetRegisterInfo *TRI,
-                                          Register VReg) const {
+void SparcInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h
index fc04542c819d4..552e7c52be262 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -90,18 +90,17 @@ class SparcInstrInfo : public SparcGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   Register getGlobalBaseReg(MachineFunction *MF) const;
 
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 333221c46ebb8..f6951c39ce9be 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -243,6 +244,10 @@ createNullTargetStreamer(MCStreamer &S) {
   return new SystemZTargetStreamer(S);
 }
 
+static MCInstrAnalysis *createSystemZMCInstrAnalysis(const MCInstrInfo *Info) {
+  return new MCInstrAnalysis(Info);
+}
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
   // Register the MCAsmInfo.
   TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
@@ -283,4 +288,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
   // Register the null streamer
   TargetRegistry::RegisterNullTargetStreamer(getTheSystemZTarget(),
                                              createNullTargetStreamer);
+
+  // Register the MCInstrAnalysis.
+  TargetRegistry::RegisterMCInstrAnalysis(getTheSystemZTarget(),
+                                          createSystemZMCInstrAnalysis);
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index d553c72589f59..a6fb5ab0ee9e1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -993,7 +993,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 void SystemZInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
     bool isKill, int FrameIdx, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Callers may expect a single instruction, so keep 128-bit moves
@@ -1005,12 +1006,10 @@ void SystemZInstrInfo::storeRegToStackSlot(
                     FrameIdx);
 }
 
-void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator MBBI,
-                                            Register DestReg, int FrameIdx,
-                                            const TargetRegisterClass *RC,
-                                            const TargetRegisterInfo *TRI,
-                                            Register VReg) const {
+void SystemZInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
+    int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Callers may expect a single instruction, so keep 128-bit moves
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index cc8a4ccd234cd..3d709a24db36c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -278,17 +278,16 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIdx, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIdx, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
                                       LiveIntervals *LIS) const override;
 
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 9295ae51b5d2e..3ace8e7eae927 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -461,7 +461,8 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                       Register SrcReg, bool isKill, int FI,
                                       const TargetRegisterClass *RC,
                                       const TargetRegisterInfo *TRI,
-                                      Register VReg) const {
+                                      Register VReg,
+                                      MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
@@ -519,12 +520,10 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     report_fatal_error("Can't store this register to stack slot");
 }
 
-void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I,
-                                       Register DestReg, int FI,
-                                       const TargetRegisterClass *RC,
-                                       const TargetRegisterInfo *TRI,
-                                       Register VReg) const {
+void VEInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
+    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end())
     DL = I->getDebugLoc();
diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h
index 3a9718f2f2603..210ce1a8a2662 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/llvm/lib/Target/VE/VEInstrInfo.h
@@ -89,18 +89,17 @@ class VEInstrInfo : public VEGenInstrInfo {
                                int &FrameIndex) const override;
   Register isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
   /// } Stack Spill & Reload
 
   /// Optimization {
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 4d40c23eb5617..18de38b2d0159 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -174,7 +174,7 @@ static unsigned getPOP2Opcode(const X86Subtarget &ST) {
 
 static bool isEAXLiveIn(MachineBasicBlock &MBB) {
   for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
-    unsigned Reg = RegMask.PhysReg;
+    MCRegister Reg = RegMask.PhysReg;
 
     if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
         Reg == X86::AH || Reg == X86::AL)
@@ -797,18 +797,40 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
                               : Is64Bit         ? X86::R11D
                                                 : X86::EAX;
 
-  BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
-      .addReg(StackPtr)
-      .setMIFlag(MachineInstr::FrameSetup);
-
   // save loop bound
   {
-    const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
-    const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
-    BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
-        .addReg(FinalStackProbed)
-        .addImm(BoundOffset)
-        .setMIFlag(MachineInstr::FrameSetup);
+    const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
+
+    // Can we calculate the loop bound using SUB with a 32-bit immediate?
+    // Note that the immediate gets sign-extended when used with a 64-bit
+    // register, so in that case we only have 31 bits to work with.
+    bool canUseSub =
+        Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
+
+    if (canUseSub) {
+      const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
+
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
+          .addReg(StackPtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
+          .addReg(FinalStackProbed)
+          .addImm(BoundOffset)
+          .setMIFlag(MachineInstr::FrameSetup);
+    } else if (Uses64BitFramePtr) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
+          .addImm(-BoundOffset)
+          .setMIFlag(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
+          .addReg(FinalStackProbed)
+          .addReg(StackPtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    } else {
+      // We're being asked to probe a stack frame that's 4 GiB or larger,
+      // but our stack pointer is only 32 bits.  This might be unreachable
+      // code, so don't complain now; just trap if it's reached at runtime.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
+    }
 
     // while in the loop, use loop-invariant reg for CFI,
     // instead of the stack pointer, which changes during the loop
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 87f3f7984989e..a956074e50d86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29215,7 +29215,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
            "Should not custom lower when pmulld is available!");
 
     // Extract the odd parts.
-    static const int UnpackMask[] = { 1, -1, 3, -1 };
+    static const int UnpackMask[] = {1, 1, 3, 3};
     SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
     SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
 
@@ -31253,7 +31253,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
   // to v2i64 results at a time. The upper 32-bits contain the wrapped bits
   // that can then be OR'd with the lower 32-bits.
   assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected");
-  static const int OddMask[] = {1, -1, 3, -1};
+  static const int OddMask[] = {1, 1, 3, 3};
   SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask);
   SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask);
 
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index edbcb17297603..557169b4aa67d 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -447,8 +447,8 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
 
 
 let mayRaiseFPException = 0 in
-defm VMINMAXNEPBF16 : avx10_minmax_packed<"vminmaxnepbf16", avx512vl_bf16_info, X86vminmax>,
-                      AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA;
+defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>,
+                   AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA;
 
 defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>,
                  avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 30a5161bbcc50..1baac05827c47 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4784,7 +4784,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
 void X86InstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
     bool isKill, int FrameIdx, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   const MachineFunction &MF = *MBB.getParent();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
@@ -4803,12 +4804,10 @@ void X86InstrInfo::storeRegToStackSlot(
         .addReg(SrcReg, getKillRegState(isKill));
 }
 
-void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        Register DestReg, int FrameIdx,
-                                        const TargetRegisterClass *RC,
-                                        const TargetRegisterInfo *TRI,
-                                        Register VReg) const {
+void X86InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+    int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   const MachineFunction &MF = *MBB.getParent();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b006bc3971984..5f87e02fe67c4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -420,18 +420,17 @@ class X86InstrInfo final : public X86GenInstrInfo {
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                         unsigned Opc, Register Reg, int FrameIdx,
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 86fd04046d16a..863cb668431ce 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -848,12 +848,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
                        X86ISD::FMAX_SAE),
     X86_INTRINSIC_DATA(avx10_vmaxps256, INTR_TYPE_2OP_SAE, X86ISD::FMAX,
                        X86ISD::FMAX_SAE),
-    X86_INTRINSIC_DATA(avx10_vminmaxnepbf16128, INTR_TYPE_3OP, X86ISD::VMINMAX,
-                       0),
-    X86_INTRINSIC_DATA(avx10_vminmaxnepbf16256, INTR_TYPE_3OP, X86ISD::VMINMAX,
-                       0),
-    X86_INTRINSIC_DATA(avx10_vminmaxnepbf16512, INTR_TYPE_3OP, X86ISD::VMINMAX,
-                       0),
+    X86_INTRINSIC_DATA(avx10_vminmaxbf16128, INTR_TYPE_3OP, X86ISD::VMINMAX, 0),
+    X86_INTRINSIC_DATA(avx10_vminmaxbf16256, INTR_TYPE_3OP, X86ISD::VMINMAX, 0),
+    X86_INTRINSIC_DATA(avx10_vminmaxbf16512, INTR_TYPE_3OP, X86ISD::VMINMAX, 0),
     X86_INTRINSIC_DATA(avx10_vminpd256, INTR_TYPE_2OP_SAE, X86ISD::FMIN,
                        X86ISD::FMIN_SAE),
     X86_INTRINSIC_DATA(avx10_vminph256, INTR_TYPE_2OP_SAE, X86ISD::FMIN,
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index 5371f1f8db48d..a15681afa28d4 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -355,7 +355,8 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 void XCoreInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
     bool isKill, int FrameIndex, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end() && !I->isDebugInstr())
     DL = I->getDebugLoc();
@@ -377,7 +378,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           Register DestReg, int FrameIndex,
                                           const TargetRegisterClass *RC,
                                           const TargetRegisterInfo *TRI,
-                                          Register VReg) const {
+                                          Register VReg,
+                                          MachineInstr::MIFlag Flags) const {
   DebugLoc DL;
   if (I != MBB.end() && !I->isDebugInstr())
     DL = I->getDebugLoc();
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 7f330539dd76a..036321b573e35 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -67,18 +67,17 @@ class XCoreInstrInfo : public XCoreGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI, Register DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
+      int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   bool reverseBranchCondition(
                           SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
index 7e00215ef3b97..c38c78b54ec21 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
@@ -123,7 +123,8 @@ void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 void XtensaInstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
     bool isKill, int FrameIdx, const TargetRegisterClass *RC,
-    const TargetRegisterInfo *TRI, Register VReg) const {
+    const TargetRegisterInfo *TRI, Register VReg,
+    MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   unsigned LoadOpcode, StoreOpcode;
   getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode, FrameIdx);
@@ -132,12 +133,10 @@ void XtensaInstrInfo::storeRegToStackSlot(
   addFrameReference(MIB, FrameIdx);
 }
 
-void XtensaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MBBI,
-                                           Register DestReg, int FrameIdx,
-                                           const TargetRegisterClass *RC,
-                                           const TargetRegisterInfo *TRI,
-                                           Register VReg) const {
+void XtensaInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
+    int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+    Register VReg, MachineInstr::MIFlag Flags) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   unsigned LoadOpcode, StoreOpcode;
   getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode, FrameIdx);
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
index 31da4d481d309..5d1206b918089 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
@@ -54,18 +54,17 @@ class XtensaInstrInfo : public XtensaGenInstrInfo {
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;
 
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, Register SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI,
-                           Register VReg) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, Register DestReg,
-                            int FrameIdx, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI,
-                            Register VReg) const override;
+  void storeRegToStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
+      bool isKill, int FrameIndex, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+
+  void loadRegFromStackSlot(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      Register DestReg, int FrameIdx, const TargetRegisterClass *RC,
+      const TargetRegisterInfo *TRI, Register VReg,
+      MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
 
   // Get the load and store opcodes for a given register class and offset.
   void getLoadStoreOpcodes(const TargetRegisterClass *RC, unsigned &LoadOpcode,
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 6ce06b434b2c0..15d959d7712dd 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -503,23 +503,22 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
       return 1;
     }
   }
-  // llvm-lib uses relative paths for both regular and thin archives, unlike
-  // standard GNU ar, which only uses relative paths for thin archives and
-  // basenames for regular archives.
-  for (NewArchiveMember &Member : Members) {
-    if (sys::path::is_relative(Member.MemberName)) {
-      Expected<std::string> PathOrErr =
-          computeArchiveRelativePath(OutputPath, Member.MemberName);
-      if (PathOrErr)
-        Member.MemberName = Saver.save(*PathOrErr);
+
+  bool Thin = Args.hasArg(OPT_llvmlibthin);
+  if (Thin) {
+    for (NewArchiveMember &Member : Members) {
+      if (sys::path::is_relative(Member.MemberName)) {
+        Expected<std::string> PathOrErr =
+            computeArchiveRelativePath(OutputPath, Member.MemberName);
+        if (PathOrErr)
+          Member.MemberName = Saver.save(*PathOrErr);
+      }
     }
   }
 
   // For compatibility with MSVC, reverse member vector after de-duplication.
   std::reverse(Members.begin(), Members.end());
 
-  bool Thin = Args.hasArg(OPT_llvmlibthin);
-
   auto Symtab = Args.hasFlag(OPT_llvmlibindex, OPT_llvmlibindex_no,
                              /*default=*/true)
                     ? SymtabWritingMode::NormalSymtab
diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
index 895c8c9d48681..40164a34f08ac 100644
--- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
+++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -299,6 +299,13 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
     }
   }
 
+  if (auto F = M.getFunction("__hipstdpar_hidden_malloc")) {
+    auto LibcMalloc = M.getOrInsertFunction(
+        "__libc_malloc", F->getFunctionType(), F->getAttributes());
+    F->replaceAllUsesWith(LibcMalloc.getCallee());
+
+    eraseFromModule(*F);
+  }
   if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
     auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
                                           F->getAttributes());
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index eb97d8b4a74f3..00c20ad5f3709 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -95,6 +95,21 @@ STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
 STATISTIC(NumGlobalArraysPadded,
           "Number of global arrays padded to alignment boundary");
 
+// FIXME:
+// Optimizing non-FMV callers is causing a regression in the llvm test suite,
+// specifically a 'predres' version is unexpectedly trapping on GravitonG4.
+// My explanation is that when the caller in not a versioned function, the
+// compiler exclusively relies on the command line option, or target attribute
+// to deduce whether a feature is available. However, there is no guarantee
+// that in reality the host supports those implied features, which arguably
+// is a user error. This option allows disabling the optimization as a short
+// term workaround to keep the bots green.
+static cl::opt<bool>
+    OptimizeNonFMVCallers("optimize-non-fmv-callers",
+                          cl::desc("Statically resolve calls to versioned "
+                                   "functions from non-versioned callers."),
+                          cl::init(false), cl::Hidden);
+
 static cl::opt<bool>
     EnableColdCCStressTest("enable-coldcc-stress-test",
                            cl::desc("Enable stress test of coldcc by adding "
@@ -2715,6 +2730,9 @@ static bool OptimizeNonTrivialIFuncs(
 
     assert(!Callees.empty() && "Expecting successful collection of versions");
 
+    LLVM_DEBUG(dbgs() << "Statically resolving calls to function "
+                      << Resolver->getName() << "\n");
+
     // Cache the feature mask for each callee.
     for (Function *Callee : Callees) {
       auto [It, Inserted] = FeatureMask.try_emplace(Callee);
@@ -2785,20 +2803,15 @@ static bool OptimizeNonTrivialIFuncs(
       } else {
         // We can't reason much about non-FMV callers. Just pick the highest
         // priority callee if it matches, otherwise bail.
-        // if (I > 0 || !implies(CallerBits, CalleeBits))
-        //
-        // FIXME: This is causing a regression in the llvm test suite,
-        // specifically a 'predres' version is unexpectedly trapping on
-        // GravitonG4. My explanation is that when the caller in not a
-        // versioned function, the compiler exclusively relies on the
-        // command line option, or target attribute to deduce whether a
-        // feature is available. However, there is no guarantee that in
-        // reality the host supports those implied features.
-        continue;
+        if (!OptimizeNonFMVCallers || I > 0 || !implies(CallerBits, CalleeBits))
+          continue;
       }
       auto &Calls = CallSites[Caller];
-      for (CallBase *CS : Calls)
+      for (CallBase *CS : Calls) {
+        LLVM_DEBUG(dbgs() << "Redirecting call " << Caller->getName() << " -> "
+                          << Callee->getName() << "\n");
         CS->setCalledOperand(Callee);
+      }
       Changed = true;
     }
     if (IF.use_empty() ||
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 988e912b2de83..f027c952f8cdc 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3110,7 +3110,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
   } else {
     // Only moving a subset of Edge's ids.
     if (CallerEdgeI)
-      ++CallerEdgeI;
+      ++(*CallerEdgeI);
     // Compute the alloc type of the subset of ids being moved.
     auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
     if (ExistingEdgeToNewCallee) {
@@ -3542,7 +3542,7 @@ void ModuleCallsiteContextGraph::updateAllocationCall(
 
 void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
                                                      AllocationType AllocType) {
-  auto *AI = Call.call().dyn_cast<AllocInfo *>();
+  auto *AI = cast<AllocInfo *>(Call.call());
   assert(AI);
   assert(AI->Versions.size() > Call.cloneNo());
   AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
@@ -3560,7 +3560,7 @@ ModuleCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
 
 AllocationType
 IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
-  const auto *AI = Call.call().dyn_cast<AllocInfo *>();
+  const auto *AI = cast<AllocInfo *>(Call.call());
   assert(AI->Versions.size() > Call.cloneNo());
   return (AllocationType)AI->Versions[Call.cloneNo()];
 }
@@ -3579,7 +3579,7 @@ void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
 
 void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
                                            FuncInfo CalleeFunc) {
-  auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
+  auto *CI = cast<CallsiteInfo *>(CallerCall.call());
   assert(CI &&
          "Caller cannot be an allocation which should not have profiled calls");
   assert(CI->Clones.size() > CallerCall.cloneNo());
@@ -3630,13 +3630,13 @@ IndexCallsiteContextGraph::cloneFunctionForCallsite(
   for (auto &Inst : CallsWithMetadataInFunc) {
     // This map always has the initial version in it.
     assert(Inst.cloneNo() == 0);
-    if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
+    if (auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
       assert(AI->Versions.size() == CloneNo);
       // We assign the allocation type later (in updateAllocationCall), just add
       // an entry for it here.
       AI->Versions.push_back(0);
     } else {
-      auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
+      auto *CI = cast<CallsiteInfo *>(Inst.call());
       assert(CI && CI->Clones.size() == CloneNo);
       // We assign the clone number later (in updateCall), just add an entry for
       // it here.
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 300a564e222e1..7b221a814aabd 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -330,11 +330,6 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
       while (processBlock(&BB)) // Thread all of the branches we can over BB.
         Changed = ChangedSinceLastAnalysisUpdate = true;
 
-      // Jump threading may have introduced redundant debug values into BB
-      // which should be removed.
-      if (Changed)
-        RemoveRedundantDbgInstrs(&BB);
-
       // Stop processing BB if it's the entry or is now deleted. The following
       // routines attempt to eliminate BB and locating a suitable replacement
       // for the entry is non-trivial.
@@ -366,7 +361,6 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
             // detect and transform nested loops later.
             !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
             TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU.get())) {
-          RemoveRedundantDbgInstrs(Succ);
           // BB is valid for cleanup here because we passed in DTU. F remains
           // BB's parent until a DTU->getDomTree() event.
           LVI->eraseBlock(&BB);
@@ -377,6 +371,13 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
     EverChanged |= Changed;
   } while (Changed);
 
+  // Jump threading may have introduced redundant debug values into F which
+  // should be removed.
+  if (EverChanged)
+    for (auto &BB : *F) {
+      RemoveRedundantDbgInstrs(&BB);
+    }
+
   LoopHeaders.clear();
   return EverChanged;
 }
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 3a699df1cde4d..2b50ccdc2eeb4 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -33,7 +34,8 @@ DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
 
 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
                          BasicBlock &CurrBB, Function::iterator &BB,
-                         const TargetTransformInfo *TTI, DomTreeUpdater *DTU) {
+                         const TargetTransformInfo *TTI, DomTreeUpdater *DTU,
+                         OptimizationRemarkEmitter *ORE) {
   // There is no need to change the IR, since backend will emit sqrt
   // instruction if the call has already been marked read-only.
   if (Call->onlyReadsMemory())
@@ -103,7 +105,8 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
 
 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
                                        const TargetTransformInfo *TTI,
-                                       DominatorTree *DT) {
+                                       DominatorTree *DT,
+                                       OptimizationRemarkEmitter *ORE) {
   std::optional<DomTreeUpdater> DTU;
   if (DT)
     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
@@ -140,7 +143,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
       case LibFunc_sqrt:
         if (TTI->haveFastSqrt(Call->getType()) &&
             optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
-                         DTU ? &*DTU : nullptr))
+                         DTU ? &*DTU : nullptr, ORE))
           break;
         continue;
       default:
@@ -160,7 +163,8 @@ PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
-  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT))
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT, &ORE))
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
   PA.preserve<DominatorTreeAnalysis>();
@@ -181,6 +185,7 @@ class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
 
@@ -195,7 +200,8 @@ class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
     DominatorTree *DT = nullptr;
     if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
       DT = &DTWP->getDomTree();
-    return runPartiallyInlineLibCalls(F, TLI, TTI, DT);
+    auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE);
   }
 };
 }
@@ -208,6 +214,7 @@ INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
                     "partially-inline-libcalls",
                     "Partially inline calls to library functions", false, false)
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index bc50f23d8eb27..9361ea063c1d6 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2174,13 +2174,14 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
   if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
     return;
 
-  // Do not reassociate boolean (i1) expressions.  We want to preserve the
+  // Do not reassociate boolean (i1/vXi1) expressions.  We want to preserve the
   // original order of evaluation for short-circuited comparisons that
   // SimplifyCFG has folded to AND/OR expressions.  If the expression
   // is not further optimized, it is likely to be transformed back to a
   // short-circuited form for code gen, and the source order may have been
-  // optimized for the most likely conditions.
-  if (I->getType()->isIntegerTy(1))
+  // optimized for the most likely conditions. For vector boolean expressions,
+  // we should be optimizing for ILP and not serializing the logical operations.
+  if (I->getType()->isIntOrIntVectorTy(1))
     return;
 
   // If this is a bitwise or instruction of operands
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 406864a6793dc..e3599315e224f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1631,12 +1631,11 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
 
   // Keep a record of all the exiting blocks.
   SmallVector<const SCEVPredicate *, 4> Predicates;
+  std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
   for (BasicBlock *BB : ExitingBlocks) {
     const SCEV *EC =
         PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
     if (isa<SCEVCouldNotCompute>(EC)) {
-      UncountableExitingBlocks.push_back(BB);
-
       SmallVector<BasicBlock *, 2> Succs(successors(BB));
       if (Succs.size() != 2) {
         reportVectorizationFailure(
@@ -1653,7 +1652,16 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
         assert(!TheLoop->contains(Succs[1]));
         ExitBlock = Succs[1];
       }
-      UncountableExitBlocks.push_back(ExitBlock);
+
+      if (SingleUncountableEdge) {
+        reportVectorizationFailure(
+            "Loop has too many uncountable exits",
+            "Cannot vectorize early exit loop with more than one early exit",
+            "TooManyUncountableEarlyExits", ORE, TheLoop);
+        return false;
+      }
+
+      SingleUncountableEdge = {BB, ExitBlock};
     } else
       CountableExitingBlocks.push_back(BB);
   }
@@ -1663,19 +1671,15 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   // PSE.getSymbolicMaxBackedgeTakenCount() below.
   Predicates.clear();
 
-  // We only support one uncountable early exit.
-  if (getUncountableExitingBlocks().size() != 1) {
-    reportVectorizationFailure(
-        "Loop has too many uncountable exits",
-        "Cannot vectorize early exit loop with more than one early exit",
-        "TooManyUncountableEarlyExits", ORE, TheLoop);
+  if (!SingleUncountableEdge) {
+    LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
     return false;
   }
 
   // The only supported early exit loops so far are ones where the early
   // exiting block is a unique predecessor of the latch block.
   BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
-  if (LatchPredBB != getUncountableEarlyExitingBlock()) {
+  if (LatchPredBB != SingleUncountableEdge->first) {
     reportVectorizationFailure("Early exit is not the latch predecessor",
                                "Cannot vectorize early exit loop",
                                "EarlyExitNotLatchPredecessor", ORE, TheLoop);
@@ -1728,7 +1732,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
 
   // The vectoriser cannot handle loads that occur after the early exit block.
-  assert(LatchBB->getUniquePredecessor() == getUncountableEarlyExitingBlock() &&
+  assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first &&
          "Expected latch predecessor to be the early exiting block");
 
   // TODO: Handle loops that may fault.
@@ -1751,6 +1755,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
                        "backedge taken count: "
                     << *SymbolicMaxBTC << '\n');
+  UncountableEdge = SingleUncountableEdge;
   return true;
 }
 
@@ -1812,7 +1817,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  HasUncountableEarlyExit = false;
   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
     if (TheLoop->getExitingBlock()) {
       reportVectorizationFailure("Cannot vectorize uncountable loop",
@@ -1822,10 +1826,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       else
         return false;
     } else {
-      HasUncountableEarlyExit = true;
       if (!isVectorizableEarlyExitLoop()) {
-        UncountableExitingBlocks.clear();
-        HasUncountableEarlyExit = false;
+        UncountableEdge = std::nullopt;
         if (DoExtraAnalysis)
           Result = false;
         else
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 29f3940ed6fa7..7167e2179af53 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3388,10 +3388,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   if (hasIrregularType(ScalarTy, DL))
     return false;
 
-  // For scalable vectors, the only interleave factor currently supported
-  // must be power of 2 since we require the (de)interleave2 intrinsics
-  // instead of shufflevectors.
-  if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
+  // We currently only know how to emit interleave/deinterleave with
+  // Factor=2 for scalable vectors. This is purely an implementation
+  // limit.
+  if (VF.isScalable() && InterleaveFactor != 2)
     return false;
 
   // If the group involves a non-integral pointer, we may not be able to
@@ -9027,10 +9027,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
 }
 
 // Collect VPIRInstructions for phis in the exit blocks that are modeled
-// in VPlan and add the exiting VPValue as operand. Some exiting values are not
-// modeled explicitly yet and won't be included. Those are un-truncated
-// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
-// increments.
+// in VPlan and add the exiting VPValue as operand.
 static SetVector<VPIRInstruction *>
 collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
                          VPlan &Plan) {
@@ -9265,9 +9262,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
                      CM.getWideningDecision(IG->getInsertPos(), VF) ==
                          LoopVectorizationCostModel::CM_Interleave);
       // For scalable vectors, the only interleave factor currently supported
-      // must be power of 2 since we require the (de)interleave2 intrinsics
-      // instead of shufflevectors.
-      assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
+      // is 2 since we require the (de)interleave2 intrinsics instead of
+      // shufflevectors.
+      assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
              "Unsupported interleave factor for scalable vectors");
       return Result;
     };
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ad4855d908747..961cab33c579f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2656,7 +2656,9 @@ class BoUpSLP {
         }
         // TODO: Check if we can remove a check for non-power-2 number of
         // scalars after full support of non-power-2 vectorization.
-        return UniqueValues.size() != 2 && has_single_bit(UniqueValues.size());
+        return UniqueValues.size() != 2 &&
+               hasFullVectorsOrPowerOf2(*R.TTI, Op0.front().V->getType(),
+                                        UniqueValues.size());
       };
 
       // If the initial strategy fails for any of the operand indexes, then we
@@ -5101,12 +5103,13 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
                  });
         });
     const unsigned AbsoluteDiff = std::abs(*Diff);
-    if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
-                              ((Sz > MinProfitableStridedLoads ||
-                                (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
-                                 has_single_bit(AbsoluteDiff))) &&
-                               AbsoluteDiff > Sz) ||
-                              *Diff == -(static_cast<int>(Sz) - 1))) {
+    if (IsPossibleStrided &&
+        (IsAnyPointerUsedOutGraph ||
+         (AbsoluteDiff > Sz &&
+          (Sz > MinProfitableStridedLoads ||
+           (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
+            AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
+         *Diff == -(static_cast<int>(Sz) - 1))) {
       int Stride = *Diff / static_cast<int>(Sz - 1);
       if (*Diff == Stride * static_cast<int>(Sz - 1)) {
         Align Alignment =
@@ -5192,9 +5195,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
       return MaskedGatherCost - GatherCost >= -SLPCostThreshold;
 
     // FIXME: The following code has not been updated for non-power-of-2
-    // vectors.  The splitting logic here does not cover the original
-    // vector if the vector factor is not a power of two.  FIXME
-    if (!has_single_bit(VL.size()))
+    // vectors (and not whole registers).  The splitting logic here does not
+    // cover the original vector if the vector factor is not a power of two.
+    if (!hasFullVectorsOrPowerOf2(TTI, ScalarTy, VL.size()))
       return false;
 
     unsigned Sz = DL->getTypeSizeInBits(ScalarTy);
@@ -5202,7 +5205,10 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
     DemandedElts.clearAllBits();
     // Iterate through possible vectorization factors and check if vectorized +
     // shuffles is better than just gather.
-    for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) {
+    for (unsigned VF =
+             getFloorFullVectorNumberOfElements(TTI, ScalarTy, VL.size() - 1);
+         VF >= MinVF;
+         VF = getFloorFullVectorNumberOfElements(TTI, ScalarTy, VF - 1)) {
       SmallVector<LoadsState> States;
       for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF) {
         ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
@@ -7632,8 +7638,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
   case Instruction::ExtractValue:
   case Instruction::ExtractElement: {
     bool Reuse = canReuseExtract(VL, CurrentOrder);
-    // FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
-    if (!has_single_bit(VL.size()))
+    // FIXME: Vectorizing is not supported yet for non-power-of-2 ops (and
+    // non-full registers).
+    if (!hasFullVectorsOrPowerOf2(*TTI, VL0->getType(), VL.size()))
       return TreeEntry::NeedToGather;
     if (Reuse || !CurrentOrder.empty())
       return TreeEntry::Vectorize;
@@ -8089,7 +8096,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
       if ((UserTreeIdx.UserTE &&
            UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) ||
-          !has_single_bit(VL.size())) {
+          !hasFullVectorsOrPowerOf2(*TTI, VL.front()->getType(), VL.size())) {
         LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
                              "for nodes with padding.\n");
         newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
@@ -9840,7 +9847,8 @@ void BoUpSLP::transformNodes() {
             if (!S || S.isAltShuffle() || !allSameBlock(Slice) ||
                 (S.getOpcode() == Instruction::Load &&
                  areKnownNonVectorizableLoads(Slice)) ||
-                (S.getOpcode() != Instruction::Load && !has_single_bit(VF)))
+                (S.getOpcode() != Instruction::Load &&
+                 !hasFullVectorsOrPowerOf2(*TTI, Slice.front()->getType(), VF)))
               continue;
             if (VF == 2) {
               // Try to vectorize reduced values or if all users are vectorized.
@@ -13618,8 +13626,9 @@ BoUpSLP::isGatherShuffledEntry(
                  return !TE->isGather();
                })))
     return {};
-  // FIXME: Gathering for non-power-of-2 nodes not implemented yet.
-  if (TE->isNonPowOf2Vec())
+  // FIXME: Gathering for non-power-of-2 (non whole registers) nodes not
+  // implemented yet.
+  if (TE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI))
     return {};
   Mask.assign(VL.size(), PoisonMaskElem);
   assert((TE->UserTreeIndices.size() == 1 ||
@@ -19200,9 +19209,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
     }
   }
 
+  Type *ScalarTy = getValueType(VL[0]);
   unsigned Sz = R.getVectorElementSize(I0);
   unsigned MinVF = R.getMinVF(Sz);
-  unsigned MaxVF = std::max<unsigned>(llvm::bit_floor(VL.size()), MinVF);
+  unsigned MaxVF = std::max<unsigned>(
+      getFloorFullVectorNumberOfElements(*TTI, ScalarTy, VL.size()), MinVF);
   MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
   if (MaxVF < 2) {
     R.getORE()->emit([&]() {
@@ -19216,10 +19227,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   bool Changed = false;
   bool CandidateFound = false;
   InstructionCost MinCost = SLPCostThreshold.getValue();
-  Type *ScalarTy = getValueType(VL[0]);
 
   unsigned NextInst = 0, MaxInst = VL.size();
-  for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
+  for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
+       VF = getFloorFullVectorNumberOfElements(*TTI, I0->getType(), VF - 1)) {
     // No actual vectorization should happen, if number of parts is the same as
     // provided vectorization factor (i.e. the scalar type is used for vector
     // code during codegen).
@@ -19234,7 +19245,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
       if (MaxVFOnly && ActualVF < MaxVF)
         break;
-      if ((VF > MinVF && ActualVF <= VF / 2) || (VF == MinVF && ActualVF < 2))
+      if ((VF > MinVF && ActualVF < VF) || (VF == MinVF && ActualVF < 2))
         break;
 
       SmallVector<Value *> Ops(ActualVF, nullptr);
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
index c1a046a157d3b..d65a04c0df6ee 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
@@ -368,10 +368,11 @@ void DependencyGraph::notifyCreateInstr(Instruction *I) {
 }
 
 void DependencyGraph::notifyMoveInstr(Instruction *I, const BBIterator &To) {
-  // Early return if `I` doesn't actually move.
+  // NOTE: This function runs before `I` moves to its new destination.
   BasicBlock *BB = To.getNodeParent();
-  if (To != BB->end() && &*To == I->getNextNode())
-    return;
+  assert(!(To != BB->end() && &*To == I->getNextNode()) &&
+         !(To == BB->end() && std::next(I->getIterator()) == BB->end()) &&
+         "Should not have been called if destination is same as origin.");
 
   // Maintain the DAGInterval.
   DAGInterval.notifyMoveInstr(I, To);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa5f92b235555..7b5d0d70933fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2855,21 +2855,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
   // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
   // must use intrinsics to interleave.
   if (VecTy->isScalableTy()) {
-    assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
-                                    "scalable vectors, must be power of 2");
-    SmallVector<Value *> InterleavingValues(Vals);
-    // When interleaving, the number of values will be shrunk until we have the
-    // single final interleaved value.
-    auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
-    for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
-      InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
-      for (unsigned I = 0; I < Midpoint; ++I)
-        InterleavingValues[I] = Builder.CreateIntrinsic(
-            InterleaveTy, Intrinsic::vector_interleave2,
-            {InterleavingValues[I], InterleavingValues[Midpoint + I]},
-            /*FMFSource=*/nullptr, Name);
-    }
-    return InterleavingValues[0];
+    VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
+    return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
+                                   Vals,
+                                   /*FMFSource=*/nullptr, Name);
   }
 
   // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2955,11 +2944,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
                           &InterleaveFactor](Value *MaskForGaps) -> Value * {
     if (State.VF.isScalable()) {
       assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
-      assert(isPowerOf2_32(InterleaveFactor) &&
+      assert(InterleaveFactor == 2 &&
              "Unsupported deinterleave factor for scalable vectors");
       auto *ResBlockInMask = State.get(BlockInMask);
-      SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
-      return interleaveVectors(State.Builder, Ops, "interleaved.mask");
+      SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
+      auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
+                                     State.VF.getKnownMinValue() * 2, true);
+      return State.Builder.CreateIntrinsic(
+          MaskTy, Intrinsic::vector_interleave2, Ops,
+          /*FMFSource=*/nullptr, "interleaved.mask");
     }
 
     if (!BlockInMask)
@@ -2999,48 +2992,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
     ArrayRef<VPValue *> VPDefs = definedValues();
     const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
     if (VecTy->isScalableTy()) {
-      assert(isPowerOf2_32(InterleaveFactor) &&
+      assert(InterleaveFactor == 2 &&
              "Unsupported deinterleave factor for scalable vectors");
 
-      // Scalable vectors cannot use arbitrary shufflevectors (only splats),
-      // so must use intrinsics to deinterleave.
-      SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
-      DeinterleavedValues[0] = NewLoad;
-      // For the case of InterleaveFactor > 2, we will have to do recursive
-      // deinterleaving, because the current available deinterleave intrinsic
-      // supports only Factor of 2, otherwise it will bailout after first
-      // iteration.
-      // When deinterleaving, the number of values will double until we
-      // have "InterleaveFactor".
-      for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
-           NumVectors *= 2) {
-        // Deinterleave the elements within the vector
-        SmallVector<Value *> TempDeinterleavedValues(NumVectors);
-        for (unsigned I = 0; I < NumVectors; ++I) {
-          auto *DiTy = DeinterleavedValues[I]->getType();
-          TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
-              Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
-              /*FMFSource=*/nullptr, "strided.vec");
-        }
-        // Extract the deinterleaved values:
-        for (unsigned I = 0; I < 2; ++I)
-          for (unsigned J = 0; J < NumVectors; ++J)
-            DeinterleavedValues[NumVectors * I + J] =
-                State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
-      }
-
-#ifndef NDEBUG
-      for (Value *Val : DeinterleavedValues)
-        assert(Val && "NULL Deinterleaved Value");
-#endif
-      for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
+        // Scalable vectors cannot use arbitrary shufflevectors (only splats),
+        // so must use intrinsics to deinterleave.
+      Value *DI = State.Builder.CreateIntrinsic(
+          Intrinsic::vector_deinterleave2, VecTy, NewLoad,
+          /*FMFSource=*/nullptr, "strided.vec");
+      unsigned J = 0;
+      for (unsigned I = 0; I < InterleaveFactor; ++I) {
         Instruction *Member = Group->getMember(I);
-        Value *StridedVec = DeinterleavedValues[I];
-        if (!Member) {
-          // This value is not needed as it's not used
-          cast<Instruction>(StridedVec)->eraseFromParent();
+
+        if (!Member)
           continue;
-        }
+
+        Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
         // If this member has different type, cast the result type.
         if (Member->getType() != ScalarTy) {
           VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index b5d5e27afa17a..49a86134411d6 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -59,6 +59,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll b/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll
new file mode 100644
index 0000000000000..bdbc99e2d98b0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm64ec-pc-windows-msvc | FileCheck %s
+
+declare void @called()
+declare void @escaped()
+define void @f(ptr %dst) {
+  call void @called()
+  store ptr @escaped, ptr %dst
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"cfguard", i32 1}
+
+; CHECK-LABEL: .section .gfids$y,"dr"
+; CHECK-NEXT:  .symidx escaped
+; CHECK-NOT:   .symidx
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index 12bd2db2297d7..8345fdfa46b4c 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) {
 ; CHECK-LABEL: extract_icmp_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp x9, x8, [x0]
-; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    orr x8, x9, x8
 ; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x0, x8, #0, #1
+; CHECK-NEXT:    mov x1, x0
 ; CHECK-NEXT:    ret
   %load = load <1 x i128>, ptr %p, align 16
   %cmp = icmp eq <1 x i128> %load, zeroinitializer
@@ -141,6 +142,26 @@ for.cond.cleanup:
 }
 
 
+; TODO: Combine the sbfx(cset) into a csetm
+define i32 @issue_121372(<4 x i32> %v) {
+; CHECK-LABEL: issue_121372:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx w8, w8, #0, #1
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    csetm w0, lt
+; CHECK-NEXT:    ret
+  %cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
+  %sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
+  %cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
+  %ext = extractelement <4 x i1> %cmp_sge, i32 0
+  %res = sext i1 %ext to i32
+  ret i32 %res
+}
+
+
 ; Negative tests
 
 define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
@@ -163,9 +184,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
 ; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #235
-; CHECK-NEXT:    adrp x9, .LCPI7_0
+; CHECK-NEXT:    adrp x9, .LCPI8_0
 ; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v1.4h, v0.4s
 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
diff --git a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll
index aae1a668b85fb..94fe06733347a 100644
--- a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll
+++ b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER
 
 declare void @callee();
 
diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
index cf490021026e0..980144d6ca584 100644
--- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
+++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2  -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2  -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR
 
 declare void @my_func()
 declare void @my_func2(<vscale x 16 x i8> %v)
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
index cad529062102c..a08e4896f5ee9 100644
--- a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-darwin -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s
 
 declare void @normal_callee();
 
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index fc0208d605dd7..33d08beae2ca7 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
+; RUN: llc -fast-isel=true -aarch64-streaming-hazard-size=0 -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
-; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
+; RUN: llc -fast-isel=false -aarch64-streaming-hazard-size=0 -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
 
 
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 4ade335c254dc..e463e833bdbde 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s
 
 declare void @private_za_callee()
 declare float @llvm.cos.f32(float)
diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index cb8a825a201ad..83437c9eb076e 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s
 
 declare void @callee()
 declare void @callee_farg(float)
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index 500c51159dd91..de6d59801b078 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s
 
 target triple = "aarch64-unknown-unknown-eabi-elf"
 
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
index 6c8aff585808f..1a49da84c00ce 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
 
 declare void @normal_callee();
 declare void @streaming_callee() "aarch64_pstate_sm_enabled";
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 572b1fff3520a..dd336e0f2e686 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
 
 declare void @normal_callee();
 declare void @streaming_callee() "aarch64_pstate_sm_enabled";
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index 58992eb2d592a..e967f3b7be5e8 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
 
 ; This file tests the following combinations related to streaming-enabled functions:
 ; [ ] N  ->  SC    (Normal -> Streaming-compatible)
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index bd0734df9e23e..cd133e946f04c 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
 
 ; This file tests the following combinations related to streaming-enabled functions:
 ; [ ] N  ->  S    (Normal -> Streaming)
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
index 803bb9fda458b..fe3f493353b50 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s
 
 target triple = "aarch64"
 
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index 5adeef3ab7455..17d689d2c9eb5 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
 
 declare void @callee();
 declare void @fixed_callee(<4 x i32>);
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard-defaults.ll b/llvm/test/CodeGen/AArch64/stack-hazard-defaults.ll
new file mode 100644
index 0000000000000..21dfe2bb19591
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-hazard-defaults.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefix=CHECK1024
+
+;; The following run lines check the default values for aarch64-stack-hazard-size/aarch64-streaming-hazard-size.
+
+;; When +sme,+sve is set the hazard size should default to 1024.
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK1024
+
+;; The hazard size can still be overridden/disabled when +sme,+sve is set.
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0
+
+;; When +sme-fa64 is set alongside +sme,+sve the default hazard size should be 0.
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme-fa64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK0
+
+;; When +sme is set (without +sve) the default hazard size should be 0.
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sme | FileCheck %s --check-prefix=CHECK0
+
+define i32 @spill_fpr_with_gpr_stack_object(i64 %d) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: spill_fpr_with_gpr_stack_object:
+; CHECK0:       // %bb.0: // %entry
+; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    .cfi_offset b8, -16
+; CHECK0-NEXT:    mov x8, x0
+; CHECK0-NEXT:    mov w0, wzr
+; CHECK0-NEXT:    //APP
+; CHECK0-NEXT:    //NO_APP
+; CHECK0-NEXT:    str x8, [sp, #8]
+; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK0-NEXT:    ret
+;
+; CHECK1024-LABEL: spill_fpr_with_gpr_stack_object:
+; CHECK1024:       // %bb.0: // %entry
+; CHECK1024-NEXT:    sub sp, sp, #1040
+; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT:    sub sp, sp, #1040
+; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
+; CHECK1024-NEXT:    .cfi_offset w29, -8
+; CHECK1024-NEXT:    .cfi_offset b8, -1040
+; CHECK1024-NEXT:    mov x8, x0
+; CHECK1024-NEXT:    mov w0, wzr
+; CHECK1024-NEXT:    //APP
+; CHECK1024-NEXT:    //NO_APP
+; CHECK1024-NEXT:    str x8, [sp, #8]
+; CHECK1024-NEXT:    add sp, sp, #1040
+; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
+; CHECK1024-NEXT:    add sp, sp, #1040
+; CHECK1024-NEXT:    ret
+entry:
+  %a = alloca i64
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store i64 %d, ptr %a
+  ret i32 0
+}
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index 20faeb23eed59..f1e684c86e896 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
 
 @dst = global [512 x i8] zeroinitializer, align 1
 @src = global [512 x i8] zeroinitializer, align 1
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index ec94198a08ca7..b4efbecb7f8bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
new file mode 100644
index 0000000000000..f7970ca81f608
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
@@ -0,0 +1,2048 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve    < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+bf16,+sme    -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 16 x i8> @test_svcls_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cls z0.b, p0/m, z0.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.b, #0 // =0x0
+; CHECK-NEXT:    cls z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cls z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    cls z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cls z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    cls z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cls z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    cls z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz z0.b, p0/m, z0.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.b, #0 // =0x0
+; CHECK-NEXT:    clz z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    clz z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    clz z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    clz z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.b, #0 // =0x0
+; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnot z0.b, p0/m, z0.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.b, #0 // =0x0
+; CHECK-NEXT:    cnot z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnot z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    cnot z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnot z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    cnot z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cnot z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    cnot z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    not z0.b, p0/m, z0.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.b, #0 // =0x0
+; CHECK-NEXT:    not z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    not z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    not z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    not z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    not z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    not z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    not z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_nxv16i8_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv16i8_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcls_nxv16i8_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cls z0.b, p0/m, z2.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv16i8_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cls z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcls_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cls z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cls z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcls_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cls z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cls z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cls z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcls_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cls z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cls z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_nxv16i8_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv16i8_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svclz_nxv16i8_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    clz z0.b, p0/m, z2.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv16i8_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    clz z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svclz_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    clz z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    clz z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svclz_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    clz z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    clz z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    clz z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svclz_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    clz z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    clz z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_nxv16i8_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv16i8_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcnt_nxv16i8_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.b, p0/m, z2.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv16i8_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cnt z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcnt_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcnt_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcnt_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_nxv8f16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8f16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svcnt_nxv8f16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8f16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8bf16_ptrue_u(double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_nxv8bf16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8bf16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8bf16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x bfloat> %y) {
+; CHECK-LABEL: test_svcnt_nxv8bf16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8bf16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_nxv4f32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4f32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svcnt_nxv4f32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4f32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnt z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_nxv2f64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2f64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svcnt_nxv2f64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnt z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2f64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnt z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_nxv16i8_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv16i8_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcnot_nxv16i8_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnot z0.b, p0/m, z2.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv16i8_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    cnot z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcnot_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnot z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    cnot z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcnot_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnot z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    cnot z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    cnot z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcnot_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    cnot z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    cnot z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_nxv16i8_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv16i8_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svnot_nxv16i8_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    not z0.b, p0/m, z2.b
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv16i8_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.b
+; CHECK-2p2-NEXT:    not z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svnot_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    not z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    not z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svnot_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    not z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    not z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    not z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svnot_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    not z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    not z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
new file mode 100644
index 0000000000000..fd0126f3166dd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
@@ -0,0 +1,1183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve    < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme    -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.d, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.s, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_x_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_x_1:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_x_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_x_2:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_z:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_z:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_f32_i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.s, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_f64_i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.d, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_f16_i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    scvtf z0.h, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_f16_i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.h, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_f32_i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_f64_i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_f16_i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_f16_i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    scvtf z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    scvtf z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z2.h
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.h
+; CHECK-2p2-NEXT:    ucvtf z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    scvtf z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    scvtf z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.s
+; CHECK-2p2-NEXT:    ucvtf z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_nxv2f64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_nxv2f64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    scvtf z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    scvtf z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_nxv2f64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue_u:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+  ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_nxv2f64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z0, z2
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue:
+; CHECK-2p2:       // %bb.0: // %entry
+; CHECK-2p2-NEXT:    ptrue p0.d
+; CHECK-2p2-NEXT:    ucvtf z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT:    ret
+entry:
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x double> %0
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir
new file mode 100644
index 0000000000000..fd794bd7d9cf9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir
@@ -0,0 +1,208 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name:            combine_ashr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_ashr
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
+    ; CHECK-NEXT: SI_RETURN
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
+    %3:_(s32) = G_CONSTANT i32 10
+    %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
+    %5:_(s32) = G_ASHR %4, %3(s32)
+    G_STORE %5(s32), %2(p0) :: (store (s32))
+    SI_RETURN
+
+...
+---
+name:            combine_lshr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_lshr
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
+    ; CHECK-NEXT: SI_RETURN
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
+    %3:_(s32) = G_CONSTANT i32 10
+    %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
+    %5:_(s32) = G_LSHR %4, %3(s32)
+    G_STORE %5(s32), %2(p0) :: (store (s32))
+    SI_RETURN
+
+...
+---
+name:            combine_shl
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_shl
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
+    ; CHECK-NEXT: SI_RETURN
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
+    %3:_(s32) = G_CONSTANT i32 16
+    %4:_(s32) = G_CONSTANT i32 4294901760
+    %5:_(s32) = G_SHL %4, %3(s32)
+    G_STORE %5(s32), %2(p0) :: (store (s32))
+    SI_RETURN
+
+...
+---
+name:            combine_ashr2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_ashr2
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1
+    ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p0) :: (store (s8))
+    ; CHECK-NEXT: SI_RETURN
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s8) = G_CONSTANT i8 -2
+    %5:_(s8) = G_ASHR %4, %3(s32)
+    G_STORE %5(s8), %2(p0) :: (store (s8))
+    SI_RETURN
+
+...
+---
+name:            combine_vector_lshr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_vector_lshr
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %0:_(<2 x s32>) = G_IMPLICIT_DEF
+    %1:_(s32) = G_CONSTANT i32 511
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_CONSTANT i32 9
+    %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
+    %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
+    %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
+    %8:_(<2 x s32>) = G_LSHR %7, %5(<2 x s32>)
+    %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
+    $vgpr0 = COPY %9(s32)
+    $vgpr1 = COPY %10(s32)
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name:            combine_vector_shl
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_vector_shl
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %0:_(<2 x s32>) = G_IMPLICIT_DEF
+    %1:_(s32) = G_CONSTANT i32 4294901760
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_CONSTANT i32 16
+    %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
+    %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
+    %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
+    %8:_(<2 x s32>) = G_SHL %7, %5(<2 x s32>)
+    %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
+    $vgpr0 = COPY %9(s32)
+    $vgpr1 = COPY %10(s32)
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name:            combine_vector_ashr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr31
+
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: combine_vector_ashr
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %0:_(<2 x s32>) = G_IMPLICIT_DEF
+    %1:_(s32) = G_CONSTANT i32 -1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_CONSTANT i32 1
+    %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
+    %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
+    %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
+    %8:_(<2 x s32>) = G_ASHR %7, %5(<2 x s32>)
+    %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
+    $vgpr0 = COPY %9(s32)
+    $vgpr1 = COPY %10(s32)
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
index 6ae8895322d6f..a8cd974b01ab4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
@@ -374,23 +374,15 @@ body:             |
     ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
-    ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
-    ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
-    ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
-    ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
-    ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+    ; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
+    ; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
     ;
     ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
     ; GFX9: liveins: $vgpr0
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
-    ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
-    ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
-    ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
-    ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
-    ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+    ; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
     %zero:_(s16) = G_CONSTANT i16 0
     %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
index 6ceb41199af6d..3780542cd8799 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
@@ -246,23 +246,15 @@ body:             |
     ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
-    ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
-    ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
-    ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
-    ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
-    ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+    ; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
+    ; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
     ;
     ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
-    ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
-    ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
-    ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
-    ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
-    ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+    ; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
     %zero:_(s16) = G_CONSTANT i16 0
     %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index b9cd330ee2b5f..4ddbb0afd7fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1434,13 +1434,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_ffbh_i32_e32 v2, 0
+; SI-NEXT:    v_add_i32_e32 v2, vcc, -1, v2
 ; SI-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; SI-NEXT:    v_ffbh_i32_e32 v3, 0
-; SI-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT:    v_add_i32_e32 v3, vcc, -1, v3
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    v_min_u32_e32 v2, v3, v2
+; SI-NEXT:    v_min_u32_e32 v2, 32, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
 ; SI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
@@ -1452,13 +1450,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_i32_e32 v2, 0
+; VI-NEXT:    v_add_u32_e32 v2, vcc, -1, v2
 ; VI-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; VI-NEXT:    v_ffbh_i32_e32 v3, 0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT:    v_add_u32_e32 v3, vcc, -1, v3
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    v_min_u32_e32 v2, v3, v2
+; VI-NEXT:    v_min_u32_e32 v2, 32, v2
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; VI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; VI-NEXT:    v_or_b32_e32 v0, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index cc185aff9eff2..784611cf68dd2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1800,9 +1800,9 @@ define amdgpu_ps i65 @s_lshr_i65_33(i65 inreg %value) {
 ; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], 1
 ; GCN-NEXT:    s_lshr_b32 s0, s1, 1
 ; GCN-NEXT:    s_mov_b32 s1, 0
-; GCN-NEXT:    s_lshl_b64 s[4:5], s[2:3], 31
-; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
-; GCN-NEXT:    s_lshr_b32 s2, s3, 1
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[2:3], 31
+; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT:    s_mov_b32 s2, 0
 ; GCN-NEXT:    ; return to shader part epilog
 ;
 ; GFX10PLUS-LABEL: s_lshr_i65_33:
@@ -1810,9 +1810,9 @@ define amdgpu_ps i65 @s_lshr_i65_33(i65 inreg %value) {
 ; GFX10PLUS-NEXT:    s_and_b64 s[2:3], s[2:3], 1
 ; GFX10PLUS-NEXT:    s_lshr_b32 s0, s1, 1
 ; GFX10PLUS-NEXT:    s_mov_b32 s1, 0
-; GFX10PLUS-NEXT:    s_lshl_b64 s[4:5], s[2:3], 31
-; GFX10PLUS-NEXT:    s_lshr_b32 s2, s3, 1
-; GFX10PLUS-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT:    s_lshl_b64 s[2:3], s[2:3], 31
+; GFX10PLUS-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
+; GFX10PLUS-NEXT:    s_mov_b32 s2, 0
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
   %result = lshr i65 %value, 33
   ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 88eb0e4b848c9..2fa5492c8a2b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -608,34 +608,25 @@ define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_i32_24bit:
@@ -677,20 +668,6 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
@@ -711,15 +688,15 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
@@ -729,10 +706,6 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 14b30e0d79946..80cda2e7f3c81 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -2530,254 +2530,227 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
-; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
-; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
-; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
-; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
-; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v1
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v4
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v3
+; GISEL-NEXT:    v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v3
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v3
+; GISEL-NEXT:    v_and_b32_e32 v10, 0xffffff, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
+; GISEL-NEXT:    v_mul_hi_u32 v0, v9, v3
+; GISEL-NEXT:    v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v7
+; GISEL-NEXT:    v_and_b32_e32 v11, 0xffffff, v2
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[5:6]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v11, v7
-; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v2
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v3
-; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v9, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v6, v1
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v4
-; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v11, 0
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v13, v[5:6]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v10
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v11, v[5:6]
-; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v18, v1, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v12, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v4
+; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v10, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, 0, v0
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v1
-; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v13, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v8, 0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v16
-; GISEL-NEXT:    v_mov_b32_e32 v1, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v11, v[1:2]
-; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v8, v[5:6]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v13, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v17, v18, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], 0, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v4, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v1, v0, 0
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v3
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v4, v[7:8]
+; GISEL-NEXT:    v_mac_f32_e32 v9, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], 0, v0, v[7:8]
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v10, v6
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], 0, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 0, v7
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
+; GISEL-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v2
+; GISEL-NEXT:    v_sub_i32_e64 v13, s[4:5], 0, v3
+; GISEL-NEXT:    v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[2:3]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, -1, v16, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v15, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v12, v6
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v8, v1
+; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v2, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v2, v12, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v15, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v15, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_hi_u32 v6, v15, v6
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v2
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v15, v5, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v0
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
+; GISEL-NEXT:    v_mov_b32_e32 v1, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v13, v10, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v14, v7, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, -1, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v12
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v15, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v7, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v5
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v1
+; GISEL-NEXT:    v_mul_hi_u32 v5, v10, v5
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
+; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v1
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v12, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_mul_hi_u32 v1, v10, v1
 ; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
-; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v11, v5, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v10, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, v0, v9, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v6
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v7, v5
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v10, v1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v6, 0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v7, v11, v1
+; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v15, v8, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v1
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v4, v0
-; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[0:1]
-; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
-; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v5, v6
+; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v1, v7
+; GISEL-NEXT:    v_mov_b32_e32 v1, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[1:2]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], 0, v10, v[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v5
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], 0, v6
+; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], 0, v6, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
 ; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v8, v10, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v7
-; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 2b12e4b973acb..530f4cf53321e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -647,31 +647,23 @@ define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_i32_24bit:
@@ -711,56 +703,40 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
-; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index ee7a040e41fd5..1f4448d9a632a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -3028,253 +3028,226 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
-; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
 ; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
-; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v3
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
-; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
+; GISEL-NEXT:    v_mov_b32_e32 v3, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
+; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v3
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
+; GISEL-NEXT:    v_mov_b32_e32 v3, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
+; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v7
+; GISEL-NEXT:    v_and_b32_e32 v11, 0xffffff, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
+; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, 0, v0
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, 0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v1, v8, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v5, v[0:1]
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
-; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v0
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v2
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v6, v9, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v5, v3, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v1, v7, v[0:1]
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7]
 ; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v4
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v7, v1
-; GISEL-NEXT:    v_subbrev_u32_e64 v13, s[4:5], 0, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v5
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], 0, v6, vcc
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v4
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT:    v_sub_i32_e64 v15, s[4:5], 0, v2
-; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, v3, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v6
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v3
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v4
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v0
+; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], 0, v3
+; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e64 v13, s[4:5], 0, v6
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v17, v[0:1]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v16, v14, v[5:6]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v18, v0, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v0, v17, v4
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v5
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v4
-; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v10, v3, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v19
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, -1, v0, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v0, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v19, v17, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v18, v0
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v19, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
-; GISEL-NEXT:    v_mul_hi_u32 v5, v17, v5
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v6, v10, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v18, v4
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v0
-; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v17, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
-; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v11, v1
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v0
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, v7, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v15, v17, v[0:1]
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v18, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v13, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v8, v6, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v17, v4
-; GISEL-NEXT:    v_mul_lo_u32 v6, v14, v0
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v4
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 0, v12
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
+; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v15, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v16, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v15, v1
+; GISEL-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v11, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v5
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v12, v0
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v5
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v4, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v11, v4
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v5, v4
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v10, v0
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v9, v4, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v5, 0, v0
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v13, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v0, 0, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v17, v0
-; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v0
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v17, v0, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v5
-; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mul_hi_u32 v1, v8, v4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v4, v1
-; GISEL-NEXT:    v_mul_hi_u32 v10, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v10, v1
-; GISEL-NEXT:    v_mov_b32_e32 v1, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[1:2]
-; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 0, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
-; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
-; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
-; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
-; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, 0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v3, v9, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v6, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v8, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], 0, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], 0, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v7, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
index 9cd85553eb7b6..6730df000e3b8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
@@ -380,20 +380,20 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
 ; GFX8-LABEL: constant_zextload_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    flat_load_ubyte v4, v[0:1]
 ; GFX8-NEXT:    s_add_u32 s2, s0, 2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    flat_store_short v[0:1], v2
-; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX8-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8-NEXT:    flat_store_short v[0:1], v3
+; GFX8-NEXT:    flat_store_short v[0:1], v4
+; GFX8-NEXT:    flat_store_short v[2:3], v5
 ; GFX8-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: constant_zextload_i8_align2:
@@ -404,7 +404,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
 ; GFX9-NEXT:    global_load_ubyte v1, v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX9-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
+; GFX9-NEXT:    global_store_short v0, v0, s[0:1] offset:2
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: constant_zextload_i8_align2:
@@ -415,7 +415,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
 ; GFX10-NEXT:    global_load_ubyte v1, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX10-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
+; GFX10-NEXT:    global_store_short v0, v0, s[0:1] offset:2
 ; GFX10-NEXT:    s_endpgm
   %load = load i8, ptr addrspace(1) %in, align 2
   %zextload = zext i8 %load to i32
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
index a3fd2a942bc2b..0818f607da0a5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
@@ -8,7 +8,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .cs:
-; GCN-NEXT:         .entry_point:    cs_amdpal
+; GCN-NEXT:         .entry_point_symbol:    cs_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2e12 (COMPUTE_PGM_RSRC1)':
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
index 657fe80be04da..e37d22c7df372 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
@@ -7,7 +7,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .es:
-; GCN-NEXT:         .entry_point:    es_amdpal
+; GCN-NEXT:         .entry_point_symbol:    es_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
index 9f5eb3927c489..d847f75a5c093 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
@@ -8,7 +8,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .gs:
-; GCN-NEXT:         .entry_point:    gs_amdpal
+; GCN-NEXT:         .entry_point_symbol:    gs_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
index 7eacedf44d09d..74f5f440c99d7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
@@ -8,7 +8,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .hs:
-; GCN-NEXT:         .entry_point:    hs_amdpal
+; GCN-NEXT:         .entry_point_symbol:    hs_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
index 973eb561a9a3d..287cc1201a3c5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
@@ -7,7 +7,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .ls:
-; GCN-NEXT:         .entry_point:    ls_amdpal
+; GCN-NEXT:         .entry_point_symbol:    ls_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
index ace21207a7eba..e1767182c359d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
@@ -11,7 +11,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .ps:
-; GCN-NEXT:         .entry_point:    amdpal_psenable
+; GCN-NEXT:         .entry_point_symbol:    amdpal_psenable
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)':
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
index e554bb8980cec..b225d978601ab 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
@@ -8,7 +8,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .vs:
-; GCN-NEXT:         .entry_point:    vs_amdpal
+; GCN-NEXT:         .entry_point_symbol:    vs_amdpal
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; GCN:     .registers:
 ; GCN-NEXT:       '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 0ec5aeb24b423..97fcf0606b5b7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -86,7 +86,7 @@ declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32,
 ; PAL-NEXT: amdpal.pipelines:
 ; PAL-NEXT:   - .hardware_stages:
 ; PAL-NEXT:       .cs:
-; PAL-NEXT:         .entry_point:    scratch2_cs
+; PAL-NEXT:         .entry_point_symbol:    scratch2_cs
 ; PAL-NEXT:         .scratch_memory_size: 0x10
 ; PAL-NEXT:         .sgpr_count:     0x
 ; PAL-NEXT:         .vgpr_count:     0x
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index ff8a490950a11..1d49e005234e3 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -12,9 +12,9 @@
 ; ALL-NEXT:     .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1))
 ; GFX90A-NEXT:  .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
 
-; ALL:       .set kernel.num_vgpr, max(41, aliasee_default.num_vgpr)
-; ALL-NEXT:  .set kernel.num_agpr, max(0, aliasee_default.num_agpr)
-; ALL-NEXT:  .set kernel.numbered_sgpr, max(33, aliasee_default.numbered_sgpr)
+; ALL:       .set kernel.num_vgpr, max(41, .Laliasee_default.num_vgpr)
+; ALL-NEXT:  .set kernel.num_agpr, max(0, .Laliasee_default.num_agpr)
+; ALL-NEXT:  .set kernel.numbered_sgpr, max(33, .Laliasee_default.numbered_sgpr)
 define amdgpu_kernel void @kernel() #0 {
 bb:
   call void @alias() #2
@@ -26,9 +26,9 @@ bb:
   call void asm sideeffect "; clobber a26 ", "~{a26}"()
   ret void
 }
-; ALL:      .set aliasee_default.num_vgpr, 0
-; ALL-NEXT: .set aliasee_default.num_agpr, 27
-; ALL-NEXT: .set aliasee_default.numbered_sgpr, 32
+; ALL:      .set .Laliasee_default.num_vgpr, 0
+; ALL-NEXT: .set .Laliasee_default.num_agpr, 27
+; ALL-NEXT: .set .Laliasee_default.numbered_sgpr, 32
 
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index fdd37bb299807..f719f50ef6f13 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -7,18 +7,18 @@
 @alias0 = hidden alias void (), ptr @aliasee_default_vgpr64_sgpr102
 
 ; CHECK-LABEL: {{^}}kernel0:
-; CHECK:      .set kernel0.num_vgpr, max(41, aliasee_default_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel0.num_agpr, max(0, aliasee_default_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, aliasee_default_vgpr64_sgpr102.numbered_sgpr)
+; CHECK:      .set kernel0.num_vgpr, max(41, .Laliasee_default_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel0.num_agpr, max(0, .Laliasee_default_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, .Laliasee_default_vgpr64_sgpr102.numbered_sgpr)
 define amdgpu_kernel void @kernel0() #0 {
 bb:
   call void @alias0() #2
   ret void
 }
 
-; CHECK:      .set aliasee_default_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK:      .set .Laliasee_default_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
 define internal void @aliasee_default_vgpr64_sgpr102() #1 {
 bb:
   call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 3b08960d164a6..cbc8e7882c45e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -12,9 +12,9 @@
 ; CHECK:      .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0)
 ; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1))
 
-; CHECK:      .set kernel1.num_vgpr, max(42, aliasee_vgpr32_sgpr76.num_vgpr)
-; CHECK-NEXT: .set kernel1.num_agpr, max(0, aliasee_vgpr32_sgpr76.num_agpr)
-; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, aliasee_vgpr32_sgpr76.numbered_sgpr)
+; CHECK:      .set kernel1.num_vgpr, max(42, .Laliasee_vgpr32_sgpr76.num_vgpr)
+; CHECK-NEXT: .set kernel1.num_agpr, max(0, .Laliasee_vgpr32_sgpr76.num_agpr)
+; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, .Laliasee_vgpr32_sgpr76.numbered_sgpr)
 define amdgpu_kernel void @kernel1() #0 {
 bb:
   call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -22,9 +22,9 @@ bb:
   ret void
 }
 
-; CHECK:      .set aliasee_vgpr32_sgpr76.num_vgpr, 27
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.numbered_sgpr, 32
+; CHECK:      .set .Laliasee_vgpr32_sgpr76.num_vgpr, 27
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.numbered_sgpr, 32
 define internal void @aliasee_vgpr32_sgpr76() #1 {
 bb:
   call void asm sideeffect "; clobber v26 ", "~{v26}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index b044e0a716799..cdefbab93c62d 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -10,18 +10,18 @@
 ; CHECK:      .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0)
 ; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1))
 
-; CHECK:      .set kernel2.num_vgpr, max(41, aliasee_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel2.num_agpr, max(0, aliasee_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, aliasee_vgpr64_sgpr102.numbered_sgpr)
+; CHECK:      .set kernel2.num_vgpr, max(41, .Laliasee_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel2.num_agpr, max(0, .Laliasee_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, .Laliasee_vgpr64_sgpr102.numbered_sgpr)
 define amdgpu_kernel void @kernel2() #0 {
 bb:
   call void @alias2() #2
   ret void
 }
 
-; CHECK:      .set aliasee_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK:      .set .Laliasee_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.numbered_sgpr, 32
 define internal void @aliasee_vgpr64_sgpr102() #1 {
 bb:
   call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index 264cc4bd190f9..43dd0a7233604 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -10,18 +10,18 @@
 ; CHECK:      .amdhsa_next_free_vgpr max(totalnumvgprs(kernel3.num_agpr, kernel3.num_vgpr), 1, 0)
 ; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel3.numbered_sgpr+(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1))
 
-; CHECK:      .set kernel3.num_vgpr, max(41, aliasee_vgpr256_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel3.num_agpr, max(0, aliasee_vgpr256_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel3.numbered_sgpr, max(33, aliasee_vgpr256_sgpr102.numbered_sgpr)
+; CHECK:      .set kernel3.num_vgpr, max(41, .Laliasee_vgpr256_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel3.num_agpr, max(0, .Laliasee_vgpr256_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel3.numbered_sgpr, max(33, .Laliasee_vgpr256_sgpr102.numbered_sgpr)
 define amdgpu_kernel void @kernel3() #0 {
 bb:
   call void @alias3() #2
   ret void
 }
 
-; CHECK:      .set aliasee_vgpr256_sgpr102.num_vgpr, 253
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.numbered_sgpr, 33
+; CHECK:      .set .Laliasee_vgpr256_sgpr102.num_vgpr, 253
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.numbered_sgpr, 33
 define internal void @aliasee_vgpr256_sgpr102() #1 {
 bb:
   call void asm sideeffect "; clobber v252 ", "~{v252}"()
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index fc89615059152..cdea4fd158b04 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -355,7 +355,7 @@ define amdgpu_kernel void @vadd64ri(ptr addrspace(1) %out) {
 ; GFX1010-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1010-NEXT:    v_add_co_u32 v0, s2, 0x56789876, v0
 ; GFX1010-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1010-NEXT:    v_add_co_ci_u32_e64 v1, s2, 0, 0x1234, s2
+; GFX1010-NEXT:    v_add_co_ci_u32_e64 v1, s2, 0x1234, 0, s2
 ; GFX1010-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1010-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1010-NEXT:    s_endpgm
@@ -365,7 +365,7 @@ define amdgpu_kernel void @vadd64ri(ptr addrspace(1) %out) {
 ; GFX1030W32-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1030W32-NEXT:    v_add_co_u32 v0, s2, 0x56789876, v0
 ; GFX1030W32-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1030W32-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, 0x1234, s2
+; GFX1030W32-NEXT:    v_add_co_ci_u32_e64 v1, null, 0x1234, 0, s2
 ; GFX1030W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W32-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W32-NEXT:    s_endpgm
@@ -375,7 +375,7 @@ define amdgpu_kernel void @vadd64ri(ptr addrspace(1) %out) {
 ; GFX1030W64-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1030W64-NEXT:    v_add_co_u32 v0, s[2:3], 0x56789876, v0
 ; GFX1030W64-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1030W64-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, 0x1234, s[2:3]
+; GFX1030W64-NEXT:    v_add_co_ci_u32_e64 v1, null, 0x1234, 0, s[2:3]
 ; GFX1030W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W64-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W64-NEXT:    s_endpgm
@@ -387,7 +387,7 @@ define amdgpu_kernel void @vadd64ri(ptr addrspace(1) %out) {
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add_co_u32 v0, s2, 0x56789876, v0
-; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, 0x1234, s2
+; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 0x1234, 0, s2
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir
index 9b3579b43a38a..ae3fa153f381a 100644
--- a/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir
@@ -7,6 +7,6 @@ name:             not_shrink_icmp
 body:             |
   bb.0:
     ; GCN-LABEL: name: not_shrink_icmp
-    ; GCN: S_CMP_GT_I32 1, 65, implicit-def $scc
+    ; GCN: S_CMP_LT_I32 65, 1, implicit-def $scc
     S_CMP_GT_I32 1, 65, implicit-def $scc
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
index b9397f9d5d4dd..9274c995dde92 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
+++ b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
@@ -1,12 +1,11 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=machine-cse -verify-machineinstrs %s -o - 2>&1 | FileCheck --check-prefix=GCN %s
 
-# GCN-LABEL: name: test_machine_cse_op_sel
-# GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
-# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
-# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
 ---
-name: test_machine_cse_op_sel
+name: test_machine_cse_op_sel_v_add_nc_u16
 body: |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_v_add_nc_u16
+  ; GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %2, 0, 1, 0, implicit $exec
   bb.0:
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
@@ -15,3 +14,110 @@ body: |
     DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
 ...
 
+---
+name: test_machine_cse_op_sel_const_v_add_nc_u16
+body: |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_const_v_add_nc_u16
+  ; GCN: %0:vgpr_32 = V_ADD_NC_U16_e64 0, 64, 0, -3, 1, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %2:vgpr_32, %0, %0, 0, 1, 0, implicit $exec
+  bb.0:
+    %1:vgpr_32 = V_ADD_NC_U16_e64 0, 64, 0, -3, 1, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = V_ADD_NC_U16_e64 0, -3, 0, 64, 1, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %1, %2, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_v_fma_f16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_v_fma_f16
+  ; GCN: %3:vgpr_32 = nofpexcept V_FMA_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %3, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = nofpexcept V_FMA_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+    %4:vgpr_32 = nofpexcept V_FMA_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %4, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_const_v_fma_f16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_const_v_fma_f16
+  ; GCN: %1:vgpr_32 = nofpexcept V_FMA_F16_e64 0, 3481272320, 0, 1, 0, %0, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %1, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = nofpexcept V_FMA_F16_e64 0, 1, 0, 3481272320, 0, %0, 0, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = nofpexcept V_FMA_F16_e64 0, 3481272320, 0, 1, 0, %0, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %2, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_v_mad_u16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_v_mad_u16
+  ; GCN: %3:vgpr_32 = V_MAD_U32_U16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %3, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = V_MAD_U32_U16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+    %4:vgpr_32 = V_MAD_U32_U16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %4, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_const_v_mad_u16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_const_v_mad_u16
+  ; GCN: %1:vgpr_32 = V_MAD_U32_U16_e64 0, 1, 0, 64, 0, %0, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %1, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MAD_U32_U16_e64 0, 1, 0, 64, 0, %0, 0, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = V_MAD_U32_U16_e64 0, 64, 0, 1, 0, %0, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %2, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_v_mad_i16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_v_mad_i16
+  ; GCN: %3:vgpr_32 = V_MAD_I32_I16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %3, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = V_MAD_I32_I16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+    %4:vgpr_32 = V_MAD_I32_I16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %5:vgpr_32, %3, %4, 0, 1, 0, implicit $exec
+...
+
+---
+name: test_machine_cse_op_sel_const_v_mad_i16
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: test_machine_cse_op_sel_const_v_mad_i16
+  ; GCN: %1:vgpr_32 = V_MAD_I32_I16_e64 0, 1, 0, 64, 0, %0, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %1, 0, 1, 0, implicit $exec
+  bb.0:
+    liveins: $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MAD_I32_I16_e64 0, 1, 0, 64, 0, %0, 0, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = V_MAD_I32_I16_e64 0, 64, 0, 1, 0, %0, 0, 0, implicit $mode, implicit $exec
+    DS_WRITE2_B32_gfx9 undef %3:vgpr_32, %1, %2, 0, 1, 0, implicit $exec
+...
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 3019d4d298eb4..b4d450a90d595 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1566,7 +1566,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffe8
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe8, v1
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
@@ -1807,7 +1807,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffe7
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe7, v1
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index b897e1feed5d5..fec020a296b9b 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -1657,8 +1657,8 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
 ; GFX10-NEXT:    v_or_b32_sdwa v2, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    v_cvt_f32_ubyte3_e32 v3, v0
-; GFX10-NEXT:    v_add_nc_u16 v1, v1, 0x900
-; GFX10-NEXT:    v_add_nc_u16 v5, v2, 0x900
+; GFX10-NEXT:    v_add_nc_u16 v1, 0x900, v1
+; GFX10-NEXT:    v_add_nc_u16 v5, 0x900, v2
 ; GFX10-NEXT:    v_cvt_f32_ubyte2_e32 v2, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 16, v1
 ; GFX10-NEXT:    v_cvt_f32_ubyte1_e32 v1, v0
@@ -1723,10 +1723,10 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
 ; GFX11-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
-; GFX11-NEXT:    v_add_nc_u16 v2, v2, 0x900
+; GFX11-NEXT:    v_add_nc_u16 v2, 0x900, v2
 ; GFX11-NEXT:    v_cvt_f32_ubyte3_e32 v3, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_add_nc_u16 v1, v1, 0x900
+; GFX11-NEXT:    v_add_nc_u16 v1, 0x900, v1
 ; GFX11-NEXT:    v_and_b32_e32 v5, 0xffff, v2
 ; GFX11-NEXT:    v_cvt_f32_ubyte2_e32 v2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
diff --git a/llvm/test/CodeGen/AMDGPU/elf-notes.ll b/llvm/test/CodeGen/AMDGPU/elf-notes.ll
index 554cb140f4292..e91bed464136f 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-notes.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-notes.ll
@@ -66,7 +66,7 @@
 ; OSABI-PAL-ELF: amdpal.pipelines:
 ; OSABI-PAL-ELF:   - .hardware_stages:
 ; OSABI-PAL-ELF:       .cs:
-; OSABI-PAL-ELF:         .entry_point:    elf_notes
+; OSABI-PAL-ELF:         .entry_point_symbol:    elf_notes
 ; OSABI-PAL-ELF:         .scratch_memory_size: 0
 ; OSABI-PAL-ELF:         .sgpr_count:     96
 ; OSABI-PAL-ELF:         .vgpr_count:     1
diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
index b58996d656ece..23b54c6741e51 100644
--- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
@@ -2,6 +2,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX11 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX12 %s
 
 define amdgpu_kernel void @fcmp_f16_lt(
 ; SI-LABEL: fcmp_f16_lt:
@@ -78,6 +79,31 @@ define amdgpu_kernel void @fcmp_f16_lt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_lt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -166,6 +192,34 @@ define amdgpu_kernel void @fcmp_f16_lt_abs(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s2
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_lt_abs:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX12-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -255,6 +309,31 @@ define amdgpu_kernel void @fcmp_f16_eq(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_eq:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_eq_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -342,6 +421,31 @@ define amdgpu_kernel void @fcmp_f16_le(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_le:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_le_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -429,6 +533,31 @@ define amdgpu_kernel void @fcmp_f16_gt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_gt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_gt_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -516,6 +645,31 @@ define amdgpu_kernel void @fcmp_f16_lg(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_lg:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_lg_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -603,6 +757,31 @@ define amdgpu_kernel void @fcmp_f16_ge(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_ge:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_ge_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -690,6 +869,31 @@ define amdgpu_kernel void @fcmp_f16_o(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_o:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -777,6 +981,31 @@ define amdgpu_kernel void @fcmp_f16_u(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_u:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -864,6 +1093,31 @@ define amdgpu_kernel void @fcmp_f16_nge(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_nge:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_nge_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -951,6 +1205,31 @@ define amdgpu_kernel void @fcmp_f16_nlg(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_nlg:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_nlg_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1038,6 +1317,31 @@ define amdgpu_kernel void @fcmp_f16_ngt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_ngt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1125,6 +1429,31 @@ define amdgpu_kernel void @fcmp_f16_nle(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_nle:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1212,6 +1541,31 @@ define amdgpu_kernel void @fcmp_f16_neq(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_neq:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_neq_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1299,6 +1653,31 @@ define amdgpu_kernel void @fcmp_f16_nlt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_f16_nlt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b32 v0, off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1401,6 +1780,36 @@ define amdgpu_kernel void @fcmp_v2f16_lt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_lt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1504,6 +1913,36 @@ define amdgpu_kernel void @fcmp_v2f16_eq(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_eq:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_eq_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_eq_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1606,6 +2045,36 @@ define amdgpu_kernel void @fcmp_v2f16_le(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_le:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_le_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_le_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1708,6 +2177,36 @@ define amdgpu_kernel void @fcmp_v2f16_gt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_gt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_gt_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_gt_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1811,6 +2310,36 @@ define amdgpu_kernel void @fcmp_v2f16_lg(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_lg:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_lg_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_lg_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -1914,6 +2443,36 @@ define amdgpu_kernel void @fcmp_v2f16_ge(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_ge:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_ge_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_ge_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2017,6 +2576,36 @@ define amdgpu_kernel void @fcmp_v2f16_o(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_o:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2120,6 +2709,36 @@ define amdgpu_kernel void @fcmp_v2f16_u(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_u:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2222,6 +2841,36 @@ define amdgpu_kernel void @fcmp_v2f16_nge(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_nge:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_nge_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_nge_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2324,6 +2973,36 @@ define amdgpu_kernel void @fcmp_v2f16_nlg(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_nlg:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_nlg_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_nlg_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2427,6 +3106,36 @@ define amdgpu_kernel void @fcmp_v2f16_ngt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_ngt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2529,6 +3238,36 @@ define amdgpu_kernel void @fcmp_v2f16_nle(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_nle:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2631,6 +3370,36 @@ define amdgpu_kernel void @fcmp_v2f16_neq(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_neq:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_neq_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_neq_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -2733,6 +3502,36 @@ define amdgpu_kernel void @fcmp_v2f16_nlt(
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], 0
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fcmp_v2f16_nlt:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_clause 0x1
+; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT:    s_mov_b32 s10, -1
+; GFX12-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX12-NEXT:    s_mov_b32 s6, s10
+; GFX12-NEXT:    s_mov_b32 s7, s11
+; GFX12-NEXT:    s_mov_b32 s14, s10
+; GFX12-NEXT:    s_mov_b32 s15, s11
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_mov_b32 s13, s3
+; GFX12-NEXT:    buffer_load_b32 v0, off, s[4:7], null
+; GFX12-NEXT:    buffer_load_b32 v1, off, s[12:15], null
+; GFX12-NEXT:    s_mov_b32 s8, s0
+; GFX12-NEXT:    s_mov_b32 s9, s1
+; GFX12-NEXT:    s_wait_loadcnt 0x1
+; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX12-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1, v0
+; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v3, v2
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX12-NEXT:    buffer_store_b64 v[0:1], off, s[8:11], null
+; GFX12-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
diff --git a/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll
new file mode 100644
index 0000000000000..87811968c7871
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 < %s | FileCheck -check-prefix=GFX90A %s
+
+@G = addrspace(1) global <2 x i32> splat (i32 5)
+
+define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) inreg nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX942-LABEL: global_load_lds_dword_saddr:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    v_readfirstlane_b32 s2, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_mov_b32 m0, s2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    global_load_lds_dword v2, s[0:1] offset:32 nt
+; GFX942-NEXT:    s_getpc_b64 s[0:1]
+; GFX942-NEXT:    s_add_u32 s0, s0, G@gotpcrel32@lo+4
+; GFX942-NEXT:    s_addc_u32 s1, s1, G@gotpcrel32@hi+12
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_mul_i32 s3, s3, 10
+; GFX942-NEXT:    s_mul_i32 s2, s2, 10
+; GFX942-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-NEXT:    v_mov_b32_e32 v1, s3
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: global_load_lds_dword_saddr:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
+; GFX90A-NEXT:    s_mov_b32 s2, s0
+; GFX90A-NEXT:    ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
+; GFX90A-NEXT:    s_mov_b32 s3, s1
+; GFX90A-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr2_sgpr3
+; GFX90A-NEXT:    s_getpc_b64 s[0:1]
+; GFX90A-NEXT:    s_add_u32 s0, s0, G@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s1, s1, G@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_mov_b32 s4, s9
+; GFX90A-NEXT:    s_mov_b32 s6, 10
+; GFX90A-NEXT:    s_mul_i32 s4, s4, s6
+; GFX90A-NEXT:    s_mov_b32 s5, s8
+; GFX90A-NEXT:    s_mul_i32 s5, s5, s6
+; GFX90A-NEXT:    v_mov_b32_e32 v2, s5
+; GFX90A-NEXT:    v_mov_b32_e32 v4, s4
+; GFX90A-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
+; GFX90A-NEXT:    ; implicit-def: $sgpr4
+; GFX90A-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX90A-NEXT:    s_mov_b32 m0, s4
+; GFX90A-NEXT:    s_nop 0
+; GFX90A-NEXT:    global_load_dword v0, s[2:3] offset:32 slc lds
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    global_store_dwordx2 v0, v[2:3], s[0:1]
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %LGV = load <2 x i32>, ptr addrspace(1) @G, align 8
+  %B = mul <2 x i32> %LGV, splat (i32 10)
+  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2)
+  store <2 x i32> %B, ptr addrspace(1) @G, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
index 7886ea16e6742..bc1f5416507a9 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
@@ -1,4 +1,6 @@
 # RUN: llc -mtriple=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes=si-fix-vgpr-copies,si-optimize-exec-masking -o - %s | FileCheck %s
+
 # Check that we first do all vector instructions and only then change exec
 # CHECK-DAG:  COPY $vgpr10_vgpr11
 # CHECK-DAG:  COPY $vgpr12_vgpr13
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-flat-instruction-valu-check.mir b/llvm/test/CodeGen/AMDGPU/hazard-flat-instruction-valu-check.mir
new file mode 100644
index 0000000000000..9a284258bc9f9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazard-flat-instruction-valu-check.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name:            test_flat_valu_hazard
+noVRegs:         true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: test_flat_valu_hazard
+    ; GCN: liveins: $vgpr0, $vgpr1
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: GLOBAL_LOAD_LDS_DWORD_SADDR killed $sgpr0_sgpr1, killed $vgpr0, 32, 2, implicit $m0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
+    ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    GLOBAL_LOAD_LDS_DWORD_SADDR killed $sgpr0_sgpr1, killed $vgpr0, 32, 2, implicit $m0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
+    FLAT_STORE_DWORDX2 killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index b2708cf13cbf3..a7cc4cfc6707f 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -224,6 +224,9 @@
 ; GCN-O1-NEXT:      Block Frequency Analysis
 ; GCN-O1-NEXT:      Constant Hoisting
 ; GCN-O1-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O1-NEXT:      Lazy Branch Probability Analysis
+; GCN-O1-NEXT:      Lazy Block Frequency Analysis
+; GCN-O1-NEXT:      Optimization Remark Emitter
 ; GCN-O1-NEXT:      Partially inline calls to library functions
 ; GCN-O1-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O1-NEXT:      Scalarize Masked Memory Intrinsics
@@ -518,6 +521,9 @@
 ; GCN-O1-OPTS-NEXT:      Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:      Constant Hoisting
 ; GCN-O1-OPTS-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O1-OPTS-NEXT:      Lazy Branch Probability Analysis
+; GCN-O1-OPTS-NEXT:      Lazy Block Frequency Analysis
+; GCN-O1-OPTS-NEXT:      Optimization Remark Emitter
 ; GCN-O1-OPTS-NEXT:      Partially inline calls to library functions
 ; GCN-O1-OPTS-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O1-OPTS-NEXT:      Scalarize Masked Memory Intrinsics
@@ -831,6 +837,9 @@
 ; GCN-O2-NEXT:      Block Frequency Analysis
 ; GCN-O2-NEXT:      Constant Hoisting
 ; GCN-O2-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O2-NEXT:      Lazy Branch Probability Analysis
+; GCN-O2-NEXT:      Lazy Block Frequency Analysis
+; GCN-O2-NEXT:      Optimization Remark Emitter
 ; GCN-O2-NEXT:      Partially inline calls to library functions
 ; GCN-O2-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O2-NEXT:      Scalarize Masked Memory Intrinsics
@@ -1152,6 +1161,9 @@
 ; GCN-O3-NEXT:      Block Frequency Analysis
 ; GCN-O3-NEXT:      Constant Hoisting
 ; GCN-O3-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O3-NEXT:      Lazy Branch Probability Analysis
+; GCN-O3-NEXT:      Lazy Block Frequency Analysis
+; GCN-O3-NEXT:      Optimization Remark Emitter
 ; GCN-O3-NEXT:      Partially inline calls to library functions
 ; GCN-O3-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O3-NEXT:      Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index f416131e3d314..480d978fa530b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -397,7 +397,7 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX10CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e64 s4, 0x7f00, v1
 ; GFX10CHECK-NEXT:    s_and_b32 s4, s4, vcc_lo
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
@@ -408,7 +408,7 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
 ; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -462,7 +462,7 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX10CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e64 s4, 0x7f00, v1
 ; GFX10CHECK-NEXT:    s_and_b32 s4, s4, vcc_lo
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
@@ -473,7 +473,7 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
 ; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -1348,7 +1348,7 @@ define i1 @isnormal_bf16(bfloat %x) {
 ; GFX10CHECK:       ; %bb.0:
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1357,7 +1357,7 @@ define i1 @isnormal_bf16(bfloat %x) {
 ; GFX11CHECK:       ; %bb.0:
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1402,7 +1402,7 @@ define i1 @not_isnormal_bf16(bfloat %x) {
 ; GFX10CHECK:       ; %bb.0:
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX10CHECK-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1411,7 +1411,7 @@ define i1 @not_isnormal_bf16(bfloat %x) {
 ; GFX11CHECK:       ; %bb.0:
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX11CHECK-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1464,7 +1464,7 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX10CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX10CHECK-NEXT:    v_cmp_lt_u16_e64 s4, 0x7eff, v1
 ; GFX10CHECK-NEXT:    s_or_b32 s4, s4, vcc_lo
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
@@ -1475,7 +1475,7 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX11CHECK-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
 ; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -1529,7 +1529,7 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX10CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX10CHECK-NEXT:    v_cmp_lt_u16_e64 s4, 0x7eff, v1
 ; GFX10CHECK-NEXT:    s_or_b32 s4, s4, vcc_lo
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
@@ -1540,7 +1540,7 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v1, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
 ; GFX11CHECK-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
 ; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -2569,7 +2569,7 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
 ; GFX10CHECK-NEXT:    v_cmp_lt_i16_e64 s4, 0x7f80, v0
 ; GFX10CHECK-NEXT:    v_cmp_eq_u16_e64 s5, 0x7f80, v0
-; GFX10CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e64 s6, 0x7f, v1
 ; GFX10CHECK-NEXT:    s_and_b32 s4, s4, vcc_lo
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
@@ -2587,7 +2587,7 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
 ; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
 ; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
 ; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s1, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f, v1
 ; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
@@ -2669,7 +2669,7 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX10CHECK-NEXT:    v_add_nc_u16 v1, v0, -1
-; GFX10CHECK-NEXT:    v_add_nc_u16 v2, v0, 0xff80
+; GFX10CHECK-NEXT:    v_add_nc_u16 v2, 0xff80, v0
 ; GFX10CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX10CHECK-NEXT:    v_cmp_lt_i16_e64 s5, 0x7fbf, v0
 ; GFX10CHECK-NEXT:    v_cmp_gt_u16_e64 s4, 0x7f, v1
@@ -2685,7 +2685,7 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v0, -1
-; GFX11CHECK-NEXT:    v_add_nc_u16 v2, v0, 0xff80
+; GFX11CHECK-NEXT:    v_add_nc_u16 v2, 0xff80, v0
 ; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s1, 0x7fbf, v0
 ; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v1
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index a2baa56ea0c98..f89341d539a0f 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -53,7 +53,7 @@
 ; CHECK-NEXT:      .cs:
 ; CHECK-NEXT:        .checksum_value: 0x9444d7d0
 ; CHECK-NEXT:        .debug_mode:     false
-; CHECK-NEXT:        .entry_point:    _amdgpu_cs_main
+; CHECK-NEXT:        .entry_point_symbol:    _amdgpu_cs_main
 ; CHECK-NEXT:        .excp_en:        0
 ; CHECK-NEXT:        .float_mode:     0xc0
 ; CHECK-NEXT:        .ieee_mode:      false
@@ -109,7 +109,7 @@
 ; CHECK-NEXT:        .wgp_mode:       false
 ; CHECK-NEXT:      .gs:
 ; CHECK-NEXT:        .debug_mode:     false
-; CHECK-NEXT:        .entry_point:    gs_shader
+; CHECK-NEXT:        .entry_point_symbol:    gs_shader
 ; CHECK-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0x200
 ; CHECK-NEXT:        .mem_ordered:    true
@@ -120,7 +120,7 @@
 ; CHECK-NEXT:        .wgp_mode:       true
 ; CHECK-NEXT:      .hs:
 ; CHECK-NEXT:        .debug_mode:     false
-; CHECK-NEXT:        .entry_point:    hs_shader
+; CHECK-NEXT:        .entry_point_symbol:    hs_shader
 ; CHECK-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0x1000
 ; CHECK-NEXT:        .mem_ordered:    true
@@ -131,7 +131,7 @@
 ; CHECK-NEXT:        .wgp_mode:       true
 ; CHECK-NEXT:      .ps:
 ; CHECK-NEXT:        .debug_mode:     false
-; CHECK-NEXT:        .entry_point:    ps_shader
+; CHECK-NEXT:        .entry_point_symbol:    ps_shader
 ; CHECK-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0
 ; CHECK-NEXT:        .mem_ordered:    true
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 7e3634fdf4ebb..01528cdf7c125 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -1329,7 +1329,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1368,7 +1368,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v1, v0, s[2:3]
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v1.l, v1.l, 0xffc0
+; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v1.l, 0xffc0, v1.l
 ; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
 ;
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
 ; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1514,7 +1514,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v1, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1561,7 +1561,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v0, s[2:3]
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, 0xffc0
+; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0xffc0, v0.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-GISEL-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -1577,7 +1577,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v1, s[2:3]
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
 ; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX11-GISEL-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -1746,8 +1746,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v2, 0xffc0
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, 0xffc0, v2
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-GISEL-NEXT:    global_store_short v0, v2, s[0:1]
@@ -1808,8 +1808,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v1.l, v1.l, 0xffc0
-; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v2.l, v2.l, 0xffc0
+; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v1.l, 0xffc0, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    v_add_nc_u16 v2.l, 0xffc0, v2.l
 ; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v0, v1, s[0:1] dlc
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v0, v2, s[0:1] dlc
@@ -1827,8 +1827,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
-; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v2, v2, 0xffc0
+; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v1, 0xffc0, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_add_nc_u16 v2, 0xffc0, v2
 ; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1] dlc
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v2, s[0:1] dlc
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
index b87c969c5bbdf..1851a34d0e560 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
@@ -61,13 +61,10 @@ define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -75,13 +72,10 @@ define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -89,13 +83,10 @@ define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -113,10 +104,9 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -127,10 +117,9 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -141,10 +130,9 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -168,13 +156,10 @@ define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -182,13 +167,10 @@ define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -196,13 +178,10 @@ define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -221,10 +200,9 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -235,10 +213,9 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -249,10 +226,9 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -544,7 +520,6 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -558,7 +533,6 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -572,7 +546,6 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -875,9 +848,12 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT:    s_nop 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -889,9 +865,12 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT:    s_nop 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -903,9 +882,12 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
+; GFX940-NEXT:    s_nop 1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -2449,7 +2431,7 @@ define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2463,7 +2445,7 @@ define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2477,7 +2459,7 @@ define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2650,7 +2632,7 @@ define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2664,7 +2646,7 @@ define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2678,7 +2660,7 @@ define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4691,13 +4673,10 @@ define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -4705,13 +4684,10 @@ define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -4719,13 +4695,10 @@ define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -4743,10 +4716,9 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4757,10 +4729,9 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4771,10 +4742,9 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5609,7 +5579,7 @@ define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5623,7 +5593,7 @@ define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5637,7 +5607,7 @@ define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7046,8 +7016,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7060,8 +7032,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7074,8 +7048,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7408,13 +7384,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
+; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT:    s_nop 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -7422,13 +7399,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT:    s_nop 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -7436,13 +7414,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
+; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
+; GFX940-NEXT:    s_nop 1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -7807,22 +7786,44 @@ define void @s_shuffle_v4i64_v3i64__0_u_u_u() {
 }
 
 define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -7830,59 +7831,99 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
 }
 
 define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %vec0 = call <3 x i64> asm "; def $0", "=s"()
-  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
-  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
-  ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__3_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %vec0 = call <3 x i64> asm "; def $0", "=s"()
-  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
-  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
-  ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+  %vec0 = call <3 x i64> asm "; def $0", "=s"()
+  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
+  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+  ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_u_u_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_u_u_u:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; use s[8:15]
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %vec0 = call <3 x i64> asm "; def $0", "=s"()
+  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+  ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %vec1 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
@@ -7891,22 +7932,40 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
 }
 
 define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %vec1 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
@@ -8150,22 +8209,50 @@ define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
 }
 
 define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[8:13]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s12
+; GFX900-NEXT:    s_mov_b32 s9, s13
+; GFX900-NEXT:    s_mov_b32 s10, s12
+; GFX900-NEXT:    s_mov_b32 s11, s13
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[8:13]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s12
+; GFX90A-NEXT:    s_mov_b32 s9, s13
+; GFX90A-NEXT:    s_mov_b32 s10, s12
+; GFX90A-NEXT:    s_mov_b32 s11, s13
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    s_mov_b32 s10, s4
+; GFX940-NEXT:    s_mov_b32 s11, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %vec1 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
@@ -8491,8 +8578,6 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
 ; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; use s[8:15]
 ; GFX9-NEXT:    ;;#ASMEND
@@ -8854,22 +8939,56 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
 }
 
 define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s9
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s9
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s9
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s10, s4
+; GFX900-NEXT:    s_mov_b32 s11, s5
+; GFX900-NEXT:    s_mov_b32 s12, s4
+; GFX900-NEXT:    s_mov_b32 s13, s5
+; GFX900-NEXT:    s_mov_b32 s14, s4
+; GFX900-NEXT:    s_mov_b32 s15, s5
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s10, s4
+; GFX90A-NEXT:    s_mov_b32 s11, s5
+; GFX90A-NEXT:    s_mov_b32 s12, s4
+; GFX90A-NEXT:    s_mov_b32 s13, s5
+; GFX90A-NEXT:    s_mov_b32 s14, s4
+; GFX90A-NEXT:    s_mov_b32 s15, s5
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s10, s0
+; GFX940-NEXT:    s_mov_b32 s11, s1
+; GFX940-NEXT:    s_mov_b32 s12, s0
+; GFX940-NEXT:    s_mov_b32 s13, s1
+; GFX940-NEXT:    s_mov_b32 s14, s0
+; GFX940-NEXT:    s_mov_b32 s15, s1
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -9022,22 +9141,56 @@ define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
 }
 
 define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s9
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s9
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s9
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s10, s4
+; GFX900-NEXT:    s_mov_b32 s11, s5
+; GFX900-NEXT:    s_mov_b32 s12, s4
+; GFX900-NEXT:    s_mov_b32 s13, s5
+; GFX900-NEXT:    s_mov_b32 s14, s4
+; GFX900-NEXT:    s_mov_b32 s15, s5
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s10, s4
+; GFX90A-NEXT:    s_mov_b32 s11, s5
+; GFX90A-NEXT:    s_mov_b32 s12, s4
+; GFX90A-NEXT:    s_mov_b32 s13, s5
+; GFX90A-NEXT:    s_mov_b32 s14, s4
+; GFX90A-NEXT:    s_mov_b32 s15, s5
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s10, s0
+; GFX940-NEXT:    s_mov_b32 s11, s1
+; GFX940-NEXT:    s_mov_b32 s12, s0
+; GFX940-NEXT:    s_mov_b32 s13, s1
+; GFX940-NEXT:    s_mov_b32 s14, s0
+; GFX940-NEXT:    s_mov_b32 s15, s1
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -9964,8 +10117,6 @@ define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -10054,8 +10205,6 @@ define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -10968,8 +11117,6 @@ define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -11058,8 +11205,6 @@ define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -11992,22 +12137,44 @@ define void @s_shuffle_v4i64_v3i64__0_3_3_3() {
 }
 
 define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -12015,22 +12182,40 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
 }
 
 define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -12898,8 +13083,6 @@ define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -13871,8 +14054,6 @@ define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -14144,8 +14325,6 @@ define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_mov_b32 s8, s12
 ; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
 ; GFX9-NEXT:    s_mov_b32 s15, s13
 ; GFX9-NEXT:    ;;#ASMSTART
@@ -14447,22 +14626,56 @@ define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
 }
 
 define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[8:13]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s12
+; GFX900-NEXT:    s_mov_b32 s9, s13
+; GFX900-NEXT:    s_mov_b32 s10, s12
+; GFX900-NEXT:    s_mov_b32 s11, s13
+; GFX900-NEXT:    s_mov_b32 s14, s12
+; GFX900-NEXT:    s_mov_b32 s15, s13
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[8:13]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s12
+; GFX90A-NEXT:    s_mov_b32 s9, s13
+; GFX90A-NEXT:    s_mov_b32 s10, s12
+; GFX90A-NEXT:    s_mov_b32 s11, s13
+; GFX90A-NEXT:    s_mov_b32 s14, s12
+; GFX90A-NEXT:    s_mov_b32 s15, s13
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    s_mov_b32 s10, s4
+; GFX940-NEXT:    s_mov_b32 s11, s5
+; GFX940-NEXT:    s_mov_b32 s14, s4
+; GFX940-NEXT:    s_mov_b32 s15, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x i64> asm "; def $0", "=s"()
   %vec1 = call <3 x i64> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
index 2b46616c87f0d..7a509ffb8c159 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
@@ -61,13 +61,10 @@ define void @v_shuffle_v4p0_v3p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4p0_v3p0__1_u_u_u:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -75,13 +72,10 @@ define void @v_shuffle_v4p0_v3p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4p0_v3p0__1_u_u_u:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -89,13 +83,10 @@ define void @v_shuffle_v4p0_v3p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_u_u_u:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -113,10 +104,9 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -127,10 +117,9 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -141,10 +130,9 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -168,13 +156,10 @@ define void @v_shuffle_v4p0_v3p0__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4p0_v3p0__4_u_u_u:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -182,13 +167,10 @@ define void @v_shuffle_v4p0_v3p0__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4p0_v3p0__4_u_u_u:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -196,13 +178,10 @@ define void @v_shuffle_v4p0_v3p0__4_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_u_u_u:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -221,10 +200,9 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -235,10 +213,9 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -249,10 +226,9 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -544,7 +520,6 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -558,7 +533,6 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -572,7 +546,6 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -875,9 +848,12 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT:    s_nop 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -889,9 +865,12 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT:    s_nop 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -903,9 +882,12 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
+; GFX940-NEXT:    s_nop 1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -2449,7 +2431,7 @@ define void @v_shuffle_v4p0_v3p0__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2463,7 +2445,7 @@ define void @v_shuffle_v4p0_v3p0__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2477,7 +2459,7 @@ define void @v_shuffle_v4p0_v3p0__u_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2650,7 +2632,7 @@ define void @v_shuffle_v4p0_v3p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2664,7 +2646,7 @@ define void @v_shuffle_v4p0_v3p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2678,7 +2660,7 @@ define void @v_shuffle_v4p0_v3p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4691,13 +4673,10 @@ define void @v_shuffle_v4p0_v3p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4p0_v3p0__1_3_3_3:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v4, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, v3
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -4705,13 +4684,10 @@ define void @v_shuffle_v4p0_v3p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4p0_v3p0__1_3_3_3:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -4719,13 +4695,10 @@ define void @v_shuffle_v4p0_v3p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_3_3_3:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v4, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -4743,10 +4716,9 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
-; GFX900-NEXT:    v_mov_b32_e32 v2, v4
-; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4757,10 +4729,9 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4771,10 +4742,9 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v4
-; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5609,7 +5579,7 @@ define void @v_shuffle_v4p0_v3p0__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5623,7 +5593,7 @@ define void @v_shuffle_v4p0_v3p0__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5637,7 +5607,7 @@ define void @v_shuffle_v4p0_v3p0__u_4_4_4(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX940-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7046,8 +7016,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
+; GFX900-NEXT:    v_mov_b32_e32 v0, v4
+; GFX900-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7060,8 +7032,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7074,8 +7048,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
 ; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
+; GFX940-NEXT:    v_mov_b32_e32 v0, v4
+; GFX940-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
+; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7408,13 +7384,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; def v[0:5]
 ; GFX900-NEXT:    ;;#ASMEND
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0
+; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT:    s_nop 0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX900-NEXT:    v_mov_b32_e32 v3, v5
-; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -7422,13 +7399,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[0:5]
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
+; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT:    s_nop 0
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
-; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
 ; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
@@ -7436,13 +7414,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_5(ptr addrspace(1) inreg %ptr) {
 ; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX940-NEXT:    ;;#ASMSTART
 ; GFX940-NEXT:    ; def v[0:5]
 ; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0
+; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
+; GFX940-NEXT:    s_nop 1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX940-NEXT:    v_mov_b32_e32 v3, v5
-; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
 ; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
@@ -7807,22 +7786,44 @@ define void @s_shuffle_v4p0_v3p0__0_u_u_u() {
 }
 
 define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -7830,59 +7831,99 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
 }
 
 define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %vec0 = call <3 x ptr> asm "; def $0", "=s"()
-  %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
-  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
-  ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__3_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %vec0 = call <3 x ptr> asm "; def $0", "=s"()
-  %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
-  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
-  ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u:
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+  %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+  %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
+  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+  ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_u_u_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_u_u_u:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; use s[8:15]
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+  %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+  ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %vec1 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
@@ -7891,22 +7932,40 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
 }
 
 define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %vec1 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
@@ -8150,22 +8209,50 @@ define void @s_shuffle_v4p0_v3p0__5_4_u_u() {
 }
 
 define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[8:13]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s12
+; GFX900-NEXT:    s_mov_b32 s9, s13
+; GFX900-NEXT:    s_mov_b32 s10, s12
+; GFX900-NEXT:    s_mov_b32 s11, s13
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[8:13]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s12
+; GFX90A-NEXT:    s_mov_b32 s9, s13
+; GFX90A-NEXT:    s_mov_b32 s10, s12
+; GFX90A-NEXT:    s_mov_b32 s11, s13
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    s_mov_b32 s10, s4
+; GFX940-NEXT:    s_mov_b32 s11, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %vec1 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
@@ -8491,8 +8578,6 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_u() {
 ; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; use s[8:15]
 ; GFX9-NEXT:    ;;#ASMEND
@@ -8854,22 +8939,56 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_5() {
 }
 
 define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s9
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s9
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s9
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s10, s4
+; GFX900-NEXT:    s_mov_b32 s11, s5
+; GFX900-NEXT:    s_mov_b32 s12, s4
+; GFX900-NEXT:    s_mov_b32 s13, s5
+; GFX900-NEXT:    s_mov_b32 s14, s4
+; GFX900-NEXT:    s_mov_b32 s15, s5
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s10, s4
+; GFX90A-NEXT:    s_mov_b32 s11, s5
+; GFX90A-NEXT:    s_mov_b32 s12, s4
+; GFX90A-NEXT:    s_mov_b32 s13, s5
+; GFX90A-NEXT:    s_mov_b32 s14, s4
+; GFX90A-NEXT:    s_mov_b32 s15, s5
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s10, s0
+; GFX940-NEXT:    s_mov_b32 s11, s1
+; GFX940-NEXT:    s_mov_b32 s12, s0
+; GFX940-NEXT:    s_mov_b32 s13, s1
+; GFX940-NEXT:    s_mov_b32 s14, s0
+; GFX940-NEXT:    s_mov_b32 s15, s1
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -9022,22 +9141,56 @@ define void @s_shuffle_v4p0_v3p0__2_0_0_0() {
 }
 
 define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s9
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s9
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s9
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s10, s4
+; GFX900-NEXT:    s_mov_b32 s11, s5
+; GFX900-NEXT:    s_mov_b32 s12, s4
+; GFX900-NEXT:    s_mov_b32 s13, s5
+; GFX900-NEXT:    s_mov_b32 s14, s4
+; GFX900-NEXT:    s_mov_b32 s15, s5
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s10, s4
+; GFX90A-NEXT:    s_mov_b32 s11, s5
+; GFX90A-NEXT:    s_mov_b32 s12, s4
+; GFX90A-NEXT:    s_mov_b32 s13, s5
+; GFX90A-NEXT:    s_mov_b32 s14, s4
+; GFX90A-NEXT:    s_mov_b32 s15, s5
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s10, s0
+; GFX940-NEXT:    s_mov_b32 s11, s1
+; GFX940-NEXT:    s_mov_b32 s12, s0
+; GFX940-NEXT:    s_mov_b32 s13, s1
+; GFX940-NEXT:    s_mov_b32 s14, s0
+; GFX940-NEXT:    s_mov_b32 s15, s1
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -9964,8 +10117,6 @@ define void @s_shuffle_v4p0_v3p0__u_1_1_1() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -10054,8 +10205,6 @@ define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -10968,8 +11117,6 @@ define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -11058,8 +11205,6 @@ define void @s_shuffle_v4p0_v3p0__3_2_2_2() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -11992,22 +12137,44 @@ define void @s_shuffle_v4p0_v3p0__0_3_3_3() {
 }
 
 define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s13, s11
-; GFX9-NEXT:    s_mov_b32 s14, s10
-; GFX9-NEXT:    s_mov_b32 s15, s11
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s6
+; GFX900-NEXT:    s_mov_b32 s9, s7
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s6
+; GFX90A-NEXT:    s_mov_b32 s9, s7
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s2
+; GFX940-NEXT:    s_mov_b32 s9, s3
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -12015,22 +12182,40 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
 }
 
 define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[4:9]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[4:9]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
   call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -12898,8 +13083,6 @@ define void @s_shuffle_v4p0_v3p0__u_4_4_4() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s12, s10
 ; GFX9-NEXT:    s_mov_b32 s13, s11
 ; GFX9-NEXT:    s_mov_b32 s14, s10
@@ -13871,8 +14054,6 @@ define void @s_shuffle_v4p0_v3p0__u_5_5_5() {
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; def s[8:13]
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
@@ -14144,8 +14325,6 @@ define void @s_shuffle_v4p0_v3p0__5_u_5_5() {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_mov_b32 s8, s12
 ; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
 ; GFX9-NEXT:    s_mov_b32 s14, s12
 ; GFX9-NEXT:    s_mov_b32 s15, s13
 ; GFX9-NEXT:    ;;#ASMSTART
@@ -14447,22 +14626,56 @@ define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
 }
 
 define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; def s[8:13]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_mov_b32 s8, s12
-; GFX9-NEXT:    s_mov_b32 s9, s13
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    s_mov_b32 s14, s12
-; GFX9-NEXT:    s_mov_b32 s15, s13
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s[8:15]
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; def s[8:13]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_mov_b32 s8, s12
+; GFX900-NEXT:    s_mov_b32 s9, s13
+; GFX900-NEXT:    s_mov_b32 s10, s12
+; GFX900-NEXT:    s_mov_b32 s11, s13
+; GFX900-NEXT:    s_mov_b32 s14, s12
+; GFX900-NEXT:    s_mov_b32 s15, s13
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s[8:15]
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def s[8:13]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b32 s8, s12
+; GFX90A-NEXT:    s_mov_b32 s9, s13
+; GFX90A-NEXT:    s_mov_b32 s10, s12
+; GFX90A-NEXT:    s_mov_b32 s11, s13
+; GFX90A-NEXT:    s_mov_b32 s14, s12
+; GFX90A-NEXT:    s_mov_b32 s15, s13
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; use s[8:15]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; def s[0:5]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_mov_b32 s8, s4
+; GFX940-NEXT:    s_mov_b32 s9, s5
+; GFX940-NEXT:    s_mov_b32 s10, s4
+; GFX940-NEXT:    s_mov_b32 s11, s5
+; GFX940-NEXT:    s_mov_b32 s14, s4
+; GFX940-NEXT:    s_mov_b32 s15, s5
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s[8:15]
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = call <3 x ptr> asm "; def $0", "=s"()
   %vec1 = call <3 x ptr> asm "; def $0", "=s"()
   %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir
new file mode 100644
index 0000000000000..039b6f9041129
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir
@@ -0,0 +1,43 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=liveintervals,virtregmap,si-lower-wwm-copies -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<live-intervals>,require<virtregmap>,si-lower-wwm-copies" -o - %s | FileCheck %s
+
+# Check for two cases of $scc being live and dead.
+---
+name: lower-wwm-copies
+registers:
+  - { id: 1, class: vgpr_32, flags: [ WWM_REG ]}
+machineFunctionInfo:
+  sgprForEXECCopy: '$sgpr2_sgpr3'
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: lower-wwm-copies
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr2_sgpr3 = S_MOV_B64 killed $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 -1
+  ; CHECK-NEXT:   $vgpr1 = COPY $vgpr0
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr2_sgpr3
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = COPY $vgpr1
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr2_sgpr3
+  bb.0:
+    liveins: $vgpr0, $scc
+    $sgpr0 = IMPLICIT_DEF
+    S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+    $vgpr1 = WWM_COPY $vgpr0
+    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+
+  bb.1:
+    liveins: $vgpr1
+    $vgpr2 = WWM_COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir b/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir
new file mode 100644
index 0000000000000..d8d4f5d0220c9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir
@@ -0,0 +1,15 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=postmisched -o - %s | FileCheck %s
+---
+name:            test_xnull_256
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_xnull_256
+    ; CHECK: IMAGE_STORE_V4_V2_gfx90a $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr8_vgpr9, killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, 15, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
+    ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e32 4, killed $vgpr2, implicit $exec
+  IMAGE_STORE_V4_V2_gfx90a $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr8_vgpr9, $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, 15, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
+  $vgpr2 = V_LSHRREV_B32_e32 4, $vgpr2, implicit $exec
+...
+
+
+# FIXME: We need xnull_128 test case (which reach unreachable in function AMDGPU::getRegBitWidth). Currently cannot find one
diff --git a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
index 29520cb7468c7..e3a6240aac005 100644
--- a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
@@ -12,7 +12,7 @@
 ; GCN-NEXT: amdpal.pipelines:
 ; GCN-NEXT:   - .hardware_stages:
 ; GCN-NEXT:       .cs:
-; GCN-NEXT:         .entry_point:    _amdgpu_cs_main
+; GCN-NEXT:         .entry_point_symbol:    _amdgpu_cs_main
 ; GCN-NEXT:         .scratch_memory_size: 0
 ; SI-NEXT:          .sgpr_count:     0x11
 ; VI-NEXT:          .sgpr_count:     0x60
diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
index dd03fb62b8ebb..82fae44e20818 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
@@ -397,7 +397,7 @@ define amdgpu_kernel void @no_widen_i16_constant_divergent_load(ptr addrspace(4)
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    global_load_u16 v0, v0, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_add_nc_u16 v2, v0, 0x3e7
+; GFX11-NEXT:    v_add_nc_u16 v2, 0x3e7, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index f6822713022a9..1840b5ce46c6f 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -37,6 +37,9 @@
 ; CHECK-NEXT:      Block Frequency Analysis
 ; CHECK-NEXT:      Constant Hoisting
 ; CHECK-NEXT:      Replace intrinsics with calls to vector library
+; CHECK-NEXT:      Lazy Branch Probability Analysis
+; CHECK-NEXT:      Lazy Block Frequency Analysis
+; CHECK-NEXT:      Optimization Remark Emitter
 ; CHECK-NEXT:      Partially inline calls to library functions
 ; CHECK-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:      Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 86e2217a8e76f..3619f0aa63c5b 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -16,7 +16,7 @@ define void @loadv4f32() {
   ; The temporary casts should all have been cleaned up
   ; CHECK-NOT: %dx.resource.casthandle
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR:]]
   %load0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
   %data0 = extractvalue {<4 x float>, i1} %load0, 0
@@ -34,7 +34,7 @@ define void @loadv4f32() {
   call void @scalar_user(float %data0_0)
   call void @scalar_user(float %data0_2)
 
-  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
+  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef) #[[#ATTR]]
   %load4 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
   %data4 = extractvalue {<4 x float>, i1} %load4, 0
@@ -49,7 +49,7 @@ define void @loadv4f32() {
   ; CHECK: insertelement <4 x float>
   call void @vector_user(<4 x float> %data4)
 
-  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
+  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef) #[[#ATTR]]
   %load12 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12)
   %data12 = extractvalue {<4 x float>, i1} %load12, 0
@@ -72,7 +72,7 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef)
+  ; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef) #[[#ATTR]]
   %load = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %bufindex)
   %data = extractvalue {<4 x float>, i1} %load, 0
@@ -108,7 +108,7 @@ define void @loadf32() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %load0 = call {float, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 0)
   %data0 = extractvalue {float, i1} %load0, 0
@@ -127,7 +127,7 @@ define void @loadv2f32() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<2 x float>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <2 x float>, 0, 0, 0) %buffer, i32 0)
 
@@ -141,12 +141,12 @@ define void @loadv4f32_checkbit() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.f32(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
 
   ; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4
-  ; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]
+  ; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) #[[#ATTR]]
   %check = extractvalue {<4 x float>, i1} %data0, 1
 
   ; CHECK: call void @check_user(i1 [[MAPPED]])
@@ -162,7 +162,7 @@ define void @loadv4i32() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4i32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<4 x i32>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 0)
 
@@ -176,7 +176,7 @@ define void @loadv4f16() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f16_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<4 x half>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 0)
 
@@ -190,9 +190,11 @@ define void @loadv4i16() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4i16_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<4 x i16>, i1} @llvm.dx.resource.load.typedbuffer(
       target("dx.TypedBuffer", <4 x i16>, 0, 0, 0) %buffer, i32 0)
 
   ret void
 }
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 381df6a63962e..6892228b0d8ae 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -17,7 +17,7 @@ define void @storefloat(<4 x float> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.resource.store.typedbuffer(
       target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
       i32 %index, <4 x float> %data)
@@ -37,7 +37,7 @@ define void @storeint(<4 x i32> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.resource.store.typedbuffer(
       target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
       i32 %index, <4 x i32> %data)
@@ -60,7 +60,7 @@ define void @storehalf(<4 x half> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.resource.store.typedbuffer(
       target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
       i32 %index, <4 x half> %data)
@@ -83,7 +83,7 @@ define void @storei16(<4 x i16> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.resource.store.typedbuffer(
       target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
       i32 %index, <4 x i16> %data)
diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll
index 80daa879f0f86..c4e02fb02733d 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandle.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll
@@ -19,14 +19,14 @@ define void @test_buffers() {
   %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
               @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_1_0_0(
                   i32 3, i32 5, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 5, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 5, i1 false) #[[#ATTR:]]
   ; CHECK-NOT: @llvm.dx.cast.handle
 
   ; RWBuffer<int> Buf : register(u7, space2)
   %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1)
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(
           i32 2, i32 7, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false) #[[#ATTR]]
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
   ; Buffer<uint4> typed2 = Buf[4]
@@ -34,20 +34,20 @@ define void @test_buffers() {
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_0_0_0t(
           i32 5, i32 3, i32 24, i32 4, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 7, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 7, i1 false) #[[#ATTR]]
 
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
   %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t(
           i32 4, i32 2, i32 1, i32 0, i1 true)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 true)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 true) #[[#ATTR]]
 
   ; ByteAddressBuffer Buf : register(t8, space1)
   %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 8, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 8, i1 false) #[[#ATTR]]
 
   ; Buffer<float4> Buf[] : register(t7)
   ; Buffer<float4> typed3 = Buf[ix]
@@ -56,11 +56,13 @@ define void @test_buffers() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_0_0_0t(
           i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false)
   ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %[[IX]], i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %[[IX]], i1 false) #[[#ATTR]]
 
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
+
 ; Just check that we have the right types and number of metadata nodes, the
 ; contents of the metadata are tested elsewhere.
 ;
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
index bf11bfa143c93..e0fcd4b2d9ac5 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
@@ -19,15 +19,15 @@ define void @test_bindings() {
   %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
               @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_1_0_0(
                   i32 3, i32 5, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 5, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 })
+  ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 5, i1 false) #[[#ATTR:]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 }) #[[#ATTR]]
 
   ; RWBuffer<int> Buf : register(u7, space2)
   %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1)
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_0t(
           i32 2, i32 7, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 7, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 })
+  ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 7, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 }) #[[#ATTR]]
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
   ; Buffer<uint4> typed2 = Buf[4]
@@ -35,23 +35,23 @@ define void @test_bindings() {
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_0_0_0t(
           i32 5, i32 3, i32 24, i32 4, i1 false)
-  ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 })
+  ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 }) #[[#ATTR]]
 
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
   %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t(
           i32 4, i32 2, i32 1, i32 0, i1 true)
-  ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 2, i1 true)
-  ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 })
+  ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 2, i1 true) #[[#ATTR]]
+  ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 }) #[[#ATTR]]
 
   ; ByteAddressBuffer Buf : register(t8, space1)
   %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 8, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 })
+  ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 8, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 }) #[[#ATTR]]
 
   ; Buffer<float4> Buf[] : register(t7)
   ; Buffer<float4> typed3 = Buf[ix]
@@ -60,12 +60,14 @@ define void @test_bindings() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_0_0_0t(
           i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false)
   ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7
-  ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 -1, i32 0, i8 0 }, i32 %[[IX]], i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 })
+  ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 -1, i32 0, i8 0 }, i32 %[[IX]], i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 }) #[[#ATTR]]
 
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 ; Just check that we have the right types and number of metadata nodes, the
 ; contents of the metadata are tested elsewhere.
 ;
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveAllTrue.ll b/llvm/test/CodeGen/DirectX/WaveActiveAllTrue.ll
index 7e84f33579a61..4e2f2139f54be 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveAllTrue.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveAllTrue.ll
@@ -2,7 +2,7 @@
 
 define noundef i1 @wave_all_simple(i1 noundef %p1) {
 entry:
-; CHECK: call i1 @dx.op.waveAllTrue(i32 114, i1 %p1)
+; CHECK: call i1 @dx.op.waveAllTrue(i32 114, i1 %p1){{$}}
   %ret = call i1 @llvm.dx.wave.all(i1 %p1)
   ret i1 %ret
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll b/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
index 5adf050a76c98..38f1ce0f9e9c9 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
@@ -2,7 +2,7 @@
 
 define noundef i1 @wave_any_simple(i1 noundef %p1) {
 entry:
-; CHECK: call i1 @dx.op.waveAnyTrue(i32 113, i1 %p1)
+; CHECK: call i1 @dx.op.waveAnyTrue(i32 113, i1 %p1){{$}}
   %ret = call i1 @llvm.dx.wave.any(i1 %p1)
   ret i1 %ret
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll b/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
index 5d32137243319..35ca5f2435b1c 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
@@ -2,7 +2,7 @@
 
 define void @main(i1 %expr) {
 entry:
-; CHECK: call i32 @dx.op.waveAllOp(i32 135, i1 %expr)
+; CHECK: call i32 @dx.op.waveAllOp(i32 135, i1 %expr){{$}}
   %0 = call i32 @llvm.dx.wave.active.countbits(i1 %expr)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll b/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
index 86b7ea4f962f7..df9fa6fefa132 100644
--- a/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
+++ b/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
@@ -2,9 +2,11 @@
 
 define void @main() {
 entry:
-; CHECK: call i32 @dx.op.waveGetLaneIndex(i32 111)
+; CHECK: call i32 @dx.op.waveGetLaneIndex(i32 111) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.wave.getlaneindex()
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
+
 declare i32 @llvm.dx.wave.getlaneindex()
diff --git a/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll b/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
index 8c2a11a3557af..571f31c3c9c64 100644
--- a/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
+++ b/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
@@ -5,27 +5,27 @@
 
 define noundef <2 x half> @wave_read_lane_v2half(<2 x half> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i0, i32 %idx)
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i1, i32 %idx)
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i0, i32 %idx){{$}}
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i1, i32 %idx){{$}}
   %ret = call <2 x half> @llvm.dx.wave.readlane.f16(<2 x half> %expr, i32 %idx)
   ret <2 x half> %ret
 }
 
 define noundef <3 x i32> @wave_read_lane_v3i32(<3 x i32> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i0, i32 %idx)
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i1, i32 %idx)
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i2, i32 %idx)
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i0, i32 %idx){{$}}
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i1, i32 %idx){{$}}
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i2, i32 %idx){{$}}
   %ret = call <3 x i32> @llvm.dx.wave.readlane(<3 x i32> %expr, i32 %idx)
   ret <3 x i32> %ret
 }
 
 define noundef <4 x double> @wave_read_lane_v4f64(<4 x double> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i0, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i1, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i2, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i3, i32 %idx)
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i0, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i1, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i2, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i3, i32 %idx){{$}}
   %ret = call <4 x double> @llvm.dx.wave.readlane(<4 x double> %expr, i32 %idx)
   ret <4 x double> %ret
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll b/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
index 0024ba66c0cad..548117d431ff2 100644
--- a/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
+++ b/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
@@ -4,53 +4,55 @@
 
 define noundef half @wave_rla_half(half noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr, i32 %idx)
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr, i32 %idx){{$}}
   %ret = call half @llvm.dx.wave.readlane.f16(half %expr, i32 %idx)
   ret half %ret
 }
 
 define noundef float @wave_rla_float(float noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call float @dx.op.waveReadLaneAt.f32(i32 117, float %expr, i32 %idx)
+; CHECK: call float @dx.op.waveReadLaneAt.f32(i32 117, float %expr, i32 %idx){{$}}
   %ret = call float @llvm.dx.wave.readlane(float %expr, i32 %idx)
   ret float %ret
 }
 
 define noundef double @wave_rla_double(double noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr, i32 %idx)
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr, i32 %idx){{$}}
   %ret = call double @llvm.dx.wave.readlane(double %expr, i32 %idx)
   ret double %ret
 }
 
 define noundef i1 @wave_rla_i1(i1 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i1 @dx.op.waveReadLaneAt.i1(i32 117, i1 %expr, i32 %idx)
+; CHECK: call i1 @dx.op.waveReadLaneAt.i1(i32 117, i1 %expr, i32 %idx){{$}}
   %ret = call i1 @llvm.dx.wave.readlane.i1(i1 %expr, i32 %idx)
   ret i1 %ret
 }
 
 define noundef i16 @wave_rla_i16(i16 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i16 @dx.op.waveReadLaneAt.i16(i32 117, i16 %expr, i32 %idx)
+; CHECK: call i16 @dx.op.waveReadLaneAt.i16(i32 117, i16 %expr, i32 %idx){{$}}
   %ret = call i16 @llvm.dx.wave.readlane.i16(i16 %expr, i32 %idx)
   ret i16 %ret
 }
 
 define noundef i32 @wave_rla_i32(i32 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr, i32 %idx)
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr, i32 %idx){{$}}
   %ret = call i32 @llvm.dx.wave.readlane.i32(i32 %expr, i32 %idx)
   ret i32 %ret
 }
 
 define noundef i64 @wave_rla_i64(i64 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i64 @dx.op.waveReadLaneAt.i64(i32 117, i64 %expr, i32 %idx)
+; CHECK: call i64 @dx.op.waveReadLaneAt.i64(i32 117, i64 %expr, i32 %idx){{$}}
   %ret = call i64 @llvm.dx.wave.readlane.i64(i64 %expr, i32 %idx)
   ret i64 %ret
 }
 
+; CHECK-NOT: attributes {{.*}} memory(none)
+
 declare half @llvm.dx.wave.readlane.f16(half, i32)
 declare float @llvm.dx.wave.readlane.f32(float, i32)
 declare double @llvm.dx.wave.readlane.f64(double, i32)
diff --git a/llvm/test/CodeGen/DirectX/abs.ll b/llvm/test/CodeGen/DirectX/abs.ll
index 34464e9db14cb..500facc959de5 100644
--- a/llvm/test/CodeGen/DirectX/abs.ll
+++ b/llvm/test/CodeGen/DirectX/abs.ll
@@ -8,7 +8,7 @@ define noundef i16 @abs_i16(i16 noundef %a) {
 entry:
 ; CHECK: sub i16 0, %a
 ; EXPCHECK: call i16 @llvm.smax.i16(i16 %a, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.binary.i16(i32 37, i16 %a, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.binary.i16(i32 37, i16 %a, i16 %{{.*}}) #[[#ATTR:]]
   %elt.abs = call i16 @llvm.abs.i16(i16 %a, i1 false)
   ret i16 %elt.abs
 }
@@ -18,7 +18,7 @@ define noundef i32 @abs_i32(i32 noundef %a) {
 entry:
 ; CHECK: sub i32 0, %a
 ; EXPCHECK: call i32 @llvm.smax.i32(i32 %a, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.binary.i32(i32 37, i32 %a, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.binary.i32(i32 37, i32 %a, i32 %{{.*}}) #[[#ATTR]]
   %elt.abs = call i32 @llvm.abs.i32(i32 %a, i1 false)
   ret i32 %elt.abs
 }
@@ -28,11 +28,13 @@ define noundef i64 @abs_i64(i64 noundef %a) {
 entry:
 ; CHECK: sub i64 0, %a
 ; EXPCHECK: call i64 @llvm.smax.i64(i64 %a, i64 %{{.*}})
-; DOPCHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %{{.*}})
+; DOPCHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %{{.*}}) #[[#ATTR]]
   %elt.abs = call i64 @llvm.abs.i64(i64 %a, i1 false)
   ret i64 %elt.abs
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.abs.i16(i16, i1 immarg)
 declare i32 @llvm.abs.i32(i32, i1 immarg)
 declare i64 @llvm.abs.i64(i64, i1 immarg)
diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll
index f4a10eb368ebf..fe8e44610ee65 100644
--- a/llvm/test/CodeGen/DirectX/acos.ll
+++ b/llvm/test/CodeGen/DirectX/acos.ll
@@ -4,14 +4,14 @@
 
 define noundef float @acos_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}}) #[[#ATTR:]]
   %elt.acos = call float @llvm.acos.f32(float %a)
   ret float %elt.acos
 }
 
 define noundef half @acos_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}}) #[[#ATTR]]
   %elt.acos = call half @llvm.acos.f16(half %a)
   ret half %elt.acos
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.acos.f16(half)
 declare float @llvm.acos.f32(float)
 declare <4 x float> @llvm.acos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll
index bd948f593c24e..a6ce185d5e3a9 100644
--- a/llvm/test/CodeGen/DirectX/asin.ll
+++ b/llvm/test/CodeGen/DirectX/asin.ll
@@ -4,14 +4,14 @@
 
 define noundef float @asin_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}}) #[[#ATTR:]]
   %elt.asin = call float @llvm.asin.f32(float %a)
   ret float %elt.asin
 }
 
 define noundef half @asin_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}}) #[[#ATTR]]
   %elt.asin = call half @llvm.asin.f16(half %a)
   ret half %elt.asin
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.asin.f16(half)
 declare float @llvm.asin.f32(float)
 declare <4 x float> @llvm.asin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll
index 58899ab49bdb8..228ccce03eecc 100644
--- a/llvm/test/CodeGen/DirectX/atan.ll
+++ b/llvm/test/CodeGen/DirectX/atan.ll
@@ -4,14 +4,14 @@
 
 define noundef float @atan_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}}) #[[#ATTR:]]
   %elt.atan = call float @llvm.atan.f32(float %a)
   ret float %elt.atan
 }
 
 define noundef half @atan_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}}) #[[#ATTR]]
   %elt.atan = call half @llvm.atan.f16(half %a)
   ret half %elt.atan
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.atan.f16(half)
 declare float @llvm.atan.f32(float)
 declare <4 x float> @llvm.atan.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
index 57a47d0a39a7c..d938cb193ef69 100644
--- a/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
+++ b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
@@ -11,7 +11,7 @@ define void @update_counter_decrement_vector() {
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
  ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
- ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1)
+ ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1){{$}}
   %1 = call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1)
   ret void
 }
@@ -23,7 +23,7 @@ define void @update_counter_increment_vector() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
   ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1)
+  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1){{$}}
   %1 = call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1)
   ret void
 }
@@ -35,7 +35,7 @@ define void @update_counter_decrement_scalar() {
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
   ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1)
+  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1){{$}}
   %1 = call i32 @llvm.dx.resource.updatecounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
index bd6e747c2fbf5..73ea2476b028f 100644
--- a/llvm/test/CodeGen/DirectX/ceil.ll
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -4,14 +4,14 @@
 
 define noundef float @ceil_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 28, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 28, float %{{.*}}) #[[#ATTR:]]
   %elt.ceil = call float @llvm.ceil.f32(float %a)
   ret float %elt.ceil
 }
 
 define noundef half @ceil_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 28, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 28, half %{{.*}}) #[[#ATTR]]
   %elt.ceil = call half @llvm.ceil.f16(half %a)
   ret half %elt.ceil
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.ceil.f16(half)
 declare float @llvm.ceil.f32(float)
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/comput_ids.ll b/llvm/test/CodeGen/DirectX/comput_ids.ll
index 976b3ea5c6ecd..b1b6cf813b598 100644
--- a/llvm/test/CodeGen/DirectX/comput_ids.ll
+++ b/llvm/test/CodeGen/DirectX/comput_ids.ll
@@ -9,7 +9,7 @@ target triple = "dxil-pc-shadermodel6.7-compute"
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_thread_id(i32 %a) #0 {
 entry:
-; CHECK:call i32 @dx.op.threadId.i32(i32 93, i32 %{{.*}})
+; CHECK:call i32 @dx.op.threadId.i32(i32 93, i32 %{{.*}}) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.thread.id(i32 %a)
   ret i32 %0
 }
@@ -18,7 +18,7 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_group_id(i32 %a) #0 {
 entry:
-; CHECK: call i32 @dx.op.groupId.i32(i32 94, i32 %{{.*}})
+; CHECK: call i32 @dx.op.groupId.i32(i32 94, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.dx.group.id(i32 %a)
   ret i32 %0
 }
@@ -27,7 +27,7 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_thread_id_in_group(i32 %a) #0 {
 entry:
-; CHECK: call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 %{{.*}})
+; CHECK: call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.dx.thread.id.in.group(i32 %a)
   ret i32 %0
 }
@@ -36,11 +36,13 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_flattened_thread_id_in_group() #0 {
 entry:
-; CHECK: call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
+; CHECK: call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) #[[#ATTR]]
   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
   ret i32 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 ; Function Attrs: nounwind readnone willreturn
 declare i32 @llvm.dx.thread.id(i32) #1
 declare i32 @llvm.dx.group.id(i32) #1
diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll
index 85f5db25570b9..e86fd8c837c3d 100644
--- a/llvm/test/CodeGen/DirectX/cos.ll
+++ b/llvm/test/CodeGen/DirectX/cos.ll
@@ -4,14 +4,14 @@
 
 define noundef float @cos_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 12, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 12, float %{{.*}}) #[[#ATTR:]]
   %elt.cos = call float @llvm.cos.f32(float %a)
   ret float %elt.cos
 }
 
 define noundef half @cos_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 12, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 12, half %{{.*}}) #[[#ATTR]]
   %elt.cos = call half @llvm.cos.f16(half %a)
   ret half %elt.cos
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.cos.f16(half)
 declare float @llvm.cos.f32(float)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll
index 670a8a3eae086..b7ae6b63d72be 100644
--- a/llvm/test/CodeGen/DirectX/cosh.ll
+++ b/llvm/test/CodeGen/DirectX/cosh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @cosh_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}}) #[[#ATTR:]]
   %elt.cosh = call float @llvm.cosh.f32(float %a)
   ret float %elt.cosh
 }
 
 define noundef half @cosh_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}}) #[[#ATTR]]
   %elt.cosh = call half @llvm.cosh.f16(half %a)
   ret half %elt.cosh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.cosh.f16(half)
 declare float @llvm.cosh.f32(float)
 declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll
index f03ab9c5e79c3..f1f509ce522dd 100644
--- a/llvm/test/CodeGen/DirectX/countbits.ll
+++ b/llvm/test/CodeGen/DirectX/countbits.ll
@@ -4,7 +4,7 @@
 
 define noundef i16 @test_countbits_short(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR:]]
 ; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i16
 ; CHECK-NEXT ret i16 [[B]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
@@ -13,7 +13,7 @@ entry:
 
 define noundef i32 @test_countbits_short2(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   %elt.zext = zext i16 %elt.ctpop to i32
@@ -22,7 +22,7 @@ entry:
 
 define noundef i32 @test_countbits_short3(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   %elt.sext = sext i16 %elt.ctpop to i32
@@ -31,7 +31,7 @@ entry:
 
 define noundef i32 @test_countbits_int(i32 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %elt.ctpop
@@ -39,7 +39,7 @@ entry:
 
 define noundef i64 @test_countbits_long(i64 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: [[B:%.*]] = zext i32 [[A]] to i64
 ; CHECK-NEXT ret i64 [[B]]
   %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
@@ -48,7 +48,7 @@ entry:
 
 define noundef i32 @test_countbits_long2(i64 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
   %elt.trunc = trunc i64 %elt.ctpop to i32
@@ -58,13 +58,13 @@ entry:
 define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -73,6 +73,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.ctpop.i16(i16)
 declare i32 @llvm.ctpop.i32(i32)
 declare i64 @llvm.ctpop.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll b/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
index 7df0520505cea..63d2873cb46e2 100644
--- a/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
+++ b/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
@@ -2,9 +2,11 @@
 
 define void @main(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: call i32 @dx.op.dot4AddPacked(i32 163, i32 %a, i32 %b, i32 %c)
+; CHECK: call i32 @dx.op.dot4AddPacked(i32 163, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.dot4add.i8packed(i32 %a, i32 %b, i32 %c)
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.dot4add.i8packed(i32, i32, i32)
diff --git a/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll b/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
index 3836b4a4bc16c..a9828bafddaab 100644
--- a/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
+++ b/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
@@ -2,9 +2,11 @@
 
 define void @main(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: call i32 @dx.op.dot4AddPacked(i32 164, i32 %a, i32 %b, i32 %c)
+; CHECK: call i32 @dx.op.dot4AddPacked(i32 164, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.dot4add.u8packed(i32 %a, i32 %b, i32 %c)
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.dot4add.u8packed(i32, i32, i32)
diff --git a/llvm/test/CodeGen/DirectX/exp.ll b/llvm/test/CodeGen/DirectX/exp.ll
index c2d9938d27ecd..7a707e36bf9f1 100644
--- a/llvm/test/CodeGen/DirectX/exp.ll
+++ b/llvm/test/CodeGen/DirectX/exp.ll
@@ -4,7 +4,7 @@
 
 ; CHECK-LABEL: exp_float
 ; CHECK: fmul float 0x3FF7154760000000, %{{.*}}
-; CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}})
+; CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) #[[#ATTR:]]
 define noundef float @exp_float(float noundef %a) {
 entry:
   %a.addr = alloca float, align 4
@@ -16,7 +16,7 @@ entry:
 
 ; CHECK-LABEL: exp_half
 ; CHECK: fmul half 0xH3DC5, %{{.*}}
-; CHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}})
+; CHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}}) #[[#ATTR]]
 ; Function Attrs: noinline nounwind optnone
 define noundef half @exp_half(half noundef %a) {
 entry:
@@ -27,5 +27,7 @@ entry:
   ret half %elt.exp
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.exp.f16(half)
 declare float @llvm.exp.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/fdot.ll b/llvm/test/CodeGen/DirectX/fdot.ll
index 78e111c41feef..c6f36087ba91d 100644
--- a/llvm/test/CodeGen/DirectX/fdot.ll
+++ b/llvm/test/CodeGen/DirectX/fdot.ll
@@ -10,7 +10,7 @@ entry:
 ; DOPCHECK: extractelement <2 x half> %a, i32 1
 ; DOPCHECK: extractelement <2 x half> %b, i32 0
 ; DOPCHECK: extractelement <2 x half> %b, i32 1
-; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call half @llvm.dx.dot2.v2f16(<2 x half> %a, <2 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v2f16(<2 x half> %a, <2 x half> %b)
   ret half %dx.dot
@@ -25,7 +25,7 @@ entry:
 ; DOPCHECK: extractelement <3 x half> %b, i32 0
 ; DOPCHECK: extractelement <3 x half> %b, i32 1
 ; DOPCHECK: extractelement <3 x half> %b, i32 2
-; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.dx.dot3.v3f16(<3 x half> %a, <3 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v3f16(<3 x half> %a, <3 x half> %b)
   ret half %dx.dot
@@ -42,7 +42,7 @@ entry:
 ; DOPCHECK: extractelement <4 x half> %b, i32 1
 ; DOPCHECK: extractelement <4 x half> %b, i32 2
 ; DOPCHECK: extractelement <4 x half> %b, i32 3
-; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.dx.dot4.v4f16(<4 x half> %a, <4 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v4f16(<4 x half> %a, <4 x half> %b)
   ret half %dx.dot
@@ -55,7 +55,7 @@ entry:
 ; DOPCHECK: extractelement <2 x float> %a, i32 1
 ; DOPCHECK: extractelement <2 x float> %b, i32 0
 ; DOPCHECK: extractelement <2 x float> %b, i32 1
-; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot2.v2f32(<2 x float> %a, <2 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v2f32(<2 x float> %a, <2 x float> %b)
   ret float %dx.dot
@@ -70,7 +70,7 @@ entry:
 ; DOPCHECK: extractelement <3 x float> %b, i32 0
 ; DOPCHECK: extractelement <3 x float> %b, i32 1
 ; DOPCHECK: extractelement <3 x float> %b, i32 2
-; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot3.v3f32(<3 x float> %a, <3 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v3f32(<3 x float> %a, <3 x float> %b)
   ret float %dx.dot
@@ -87,12 +87,14 @@ entry:
 ; DOPCHECK: extractelement <4 x float> %b, i32 1
 ; DOPCHECK: extractelement <4 x float> %b, i32 2
 ; DOPCHECK: extractelement <4 x float> %b, i32 3
-; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot4.v4f32(<4 x float> %a, <4 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v4f32(<4 x float> %a, <4 x float> %b)
   ret float %dx.dot
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half  @llvm.dx.fdot.v2f16(<2 x half> , <2 x half> )
 declare half  @llvm.dx.fdot.v3f16(<3 x half> , <3 x half> )
 declare half  @llvm.dx.fdot.v4f16(<4 x half> , <4 x half> )
diff --git a/llvm/test/CodeGen/DirectX/firstbithigh.ll b/llvm/test/CodeGen/DirectX/firstbithigh.ll
index 5584c433fb6f0..794b0f20a0269 100644
--- a/llvm/test/CodeGen/DirectX/firstbithigh.ll
+++ b/llvm/test/CodeGen/DirectX/firstbithigh.ll
@@ -4,42 +4,42 @@
 
 define noundef i32 @test_firstbithigh_ushort(i16 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}}) #[[#ATTR:]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i16(i16 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_short(i16 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i16(i16 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_uint(i32 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i32(i32 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_int(i32 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i32(i32 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_ulong(i64 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i64(i64 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_long(i64 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i64(i64 %a)
   ret i32 %elt.firstbithigh
 }
@@ -47,13 +47,13 @@ entry:
 define noundef <4 x i32> @test_firstbituhigh_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -65,13 +65,13 @@ entry:
 define noundef <4 x i32> @test_firstbitshigh_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -80,6 +80,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.firstbituhigh.i16(i16)
 declare i32 @llvm.dx.firstbituhigh.i32(i32)
 declare i32 @llvm.dx.firstbituhigh.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll
index eaab3988c2c70..e82f23628c316 100644
--- a/llvm/test/CodeGen/DirectX/floor.ll
+++ b/llvm/test/CodeGen/DirectX/floor.ll
@@ -4,14 +4,14 @@
 
 define noundef float @floor_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}}) #[[#ATTR:]]
   %elt.floor = call float @llvm.floor.f32(float %a)
   ret float %elt.floor
 }
 
 define noundef half @floor_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}}) #[[#ATTR]]
   %elt.floor = call half @llvm.floor.f16(half %a)
   ret half %elt.floor
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @floor_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.floor.f16(half)
 declare float @llvm.floor.f32(float)
 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/fmad.ll b/llvm/test/CodeGen/DirectX/fmad.ll
index e1f4e5cd50c4f..868a3dd1503b5 100644
--- a/llvm/test/CodeGen/DirectX/fmad.ll
+++ b/llvm/test/CodeGen/DirectX/fmad.ll
@@ -1,10 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call half @dx.op.tertiary.f16(i32 46, half %{{.*}}, half %{{.*}}, half %{{.*}})
-; CHECK:call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}})
-; CHECK:call double @dx.op.tertiary.f64(i32 46, double %{{.*}}, double %{{.*}}, double %{{.*}})
+; CHECK:call half @dx.op.tertiary.f16(i32 46, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
+; CHECK:call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
+; CHECK:call double @dx.op.tertiary.f64(i32 46, double %{{.*}}, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll
index 05852ee33486d..60dba72836e23 100644
--- a/llvm/test/CodeGen/DirectX/fmax.ll
+++ b/llvm/test/CodeGen/DirectX/fmax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_fmax_half
 define noundef half @test_fmax_half(half noundef %a, half noundef %b) {
 entry:
-; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
   %0 = call half @llvm.maxnum.f16(half %a, half %b)
   ret half %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_fmax_float
 define noundef float @test_fmax_float(float noundef %a, float noundef %b) {
 entry:
-; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
   %0 = call float @llvm.maxnum.f32(float %a, float %b)
   ret float %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_fmax_double
 define noundef double @test_fmax_double(double noundef %a, double noundef %b) {
 entry:
-; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
   %0 = call double @llvm.maxnum.f64(double %a, double %b)
   ret double %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.maxnum.f16(half, half)
 declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll
index 1c6c7ca3f2e38..d592b7c26fbfb 100644
--- a/llvm/test/CodeGen/DirectX/fmin.ll
+++ b/llvm/test/CodeGen/DirectX/fmin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_fmin_half
 define noundef half @test_fmin_half(half noundef %a, half noundef %b) {
 entry:
-; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
   %0 = call half @llvm.minnum.f16(half %a, half %b)
   ret half %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_fmin_float
 define noundef float @test_fmin_float(float noundef %a, float noundef %b) {
 entry:
-; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
   %0 = call float @llvm.minnum.f32(float %a, float %b)
   ret float %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_fmin_double
 define noundef double @test_fmin_double(double noundef %a, double noundef %b) {
 entry:
-; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
   %0 = call double @llvm.minnum.f64(double %a, double %b)
   ret double %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.minnum.f16(half, half)
 declare float @llvm.minnum.f32(float, float)
 declare double @llvm.minnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/frac.ll b/llvm/test/CodeGen/DirectX/frac.ll
index ef24527ce837b..e9858287b7b53 100644
--- a/llvm/test/CodeGen/DirectX/frac.ll
+++ b/llvm/test/CodeGen/DirectX/frac.ll
@@ -7,7 +7,7 @@ define noundef half @frac_half(half noundef %a) {
 ; CHECK-LABEL: define noundef half @frac_half(
 ; CHECK-SAME: half noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]])
+; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]]) #[[#ATTR:]]
 ; CHECK-NEXT:    ret half [[DX_FRAC1]]
 ;
 entry:
@@ -19,7 +19,7 @@ define noundef float @frac_float(float noundef %a) #0 {
 ; CHECK-LABEL: define noundef float @frac_float(
 ; CHECK-SAME: float noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]])
+; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]]) #[[#ATTR]]
 ; CHECK-NEXT:    ret float [[DX_FRAC1]]
 ;
 entry:
@@ -32,13 +32,13 @@ define noundef <4 x float> @frac_float4(<4 x float> noundef %a) #0 {
 ; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
-; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]])
+; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
-; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]])
+; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
-; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]])
+; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
-; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]])
+; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
 ; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
 ; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
@@ -50,6 +50,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half  @llvm.dx.frac.f16(half)
 declare float @llvm.dx.frac.f32(float)
 declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/idot.ll b/llvm/test/CodeGen/DirectX/idot.ll
index 26e7ff395bd16..8a89d5d3a7a9b 100644
--- a/llvm/test/CodeGen/DirectX/idot.ll
+++ b/llvm/test/CodeGen/DirectX/idot.ll
@@ -12,7 +12,7 @@ entry:
 ; CHECK: extractelement <2 x i16> %a, i64 1
 ; CHECK: extractelement <2 x i16> %b, i64 1
 ; EXPCHECK: call i16 @llvm.dx.imad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %dot = call i16 @llvm.dx.sdot.v3i16(<2 x i16> %a, <2 x i16> %b)
   ret i16 %dot
 }
@@ -26,15 +26,15 @@ entry:
 ; CHECK: extractelement <4 x i32> %a, i64 1
 ; CHECK: extractelement <4 x i32> %b, i64 1
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 2
 ; CHECK: extractelement <4 x i32> %b, i64 2
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 3
 ; CHECK: extractelement <4 x i32> %b, i64 3
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %dot = call i32 @llvm.dx.sdot.v4i32(<4 x i32> %a, <4 x i32> %b)
   ret i32 %dot
 }
@@ -48,11 +48,11 @@ entry:
 ; CHECK: extractelement <3 x i16> %a, i64 1
 ; CHECK: extractelement <3 x i16> %b, i64 1
 ; EXPCHECK: call i16 @llvm.dx.umad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <3 x i16> %a, i64 2
 ; CHECK: extractelement <3 x i16> %b, i64 2
 ; EXPCHECK: call i16 @llvm.dx.umad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
   %dot = call i16 @llvm.dx.udot.v3i16(<3 x i16> %a, <3 x i16> %b)
   ret i16 %dot
 }
@@ -66,15 +66,15 @@ entry:
 ; CHECK: extractelement <4 x i32> %a, i64 1
 ; CHECK: extractelement <4 x i32> %b, i64 1
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 2
 ; CHECK: extractelement <4 x i32> %b, i64 2
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 3
 ; CHECK: extractelement <4 x i32> %b, i64 3
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %dot = call i32 @llvm.dx.udot.v4i32(<4 x i32> %a, <4 x i32> %b)
   ret i32 %dot
 }
@@ -88,11 +88,13 @@ entry:
 ; CHECK: extractelement <2 x i64> %a, i64 1
 ; CHECK: extractelement <2 x i64> %b, i64 1
 ; EXPCHECK: call i64 @llvm.dx.umad.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
-; DOPCHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; DOPCHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %dot = call i64 @llvm.dx.udot.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret i64 %dot
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.dx.sdot.v2i16(<2 x i16>, <2 x i16>)
 declare i32 @llvm.dx.sdot.v4i32(<4 x i32>, <4 x i32>)
 declare i16 @llvm.dx.udot.v3i32(<3 x i16>, <3 x i16>)
diff --git a/llvm/test/CodeGen/DirectX/imad.ll b/llvm/test/CodeGen/DirectX/imad.ll
index 5b818f86bc7f2..5d9463d658cf5 100644
--- a/llvm/test/CodeGen/DirectX/imad.ll
+++ b/llvm/test/CodeGen/DirectX/imad.ll
@@ -1,9 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
+; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 03a00c40498d5..2bd83e94b9708 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -4,18 +4,19 @@
 
 define noundef i1 @isinf_float(float noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
+  ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) #[[#ATTR:]]
   %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a)
   ret i1 %dx.isinf
 }
 
 define noundef i1 @isinf_half(half noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
+  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
   %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
   ret i1 %dx.isinf
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare i1 @llvm.dx.isinf.f16(half)
 declare i1 @llvm.dx.isinf.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log.ll b/llvm/test/CodeGen/DirectX/log.ll
index 195713309cd44..d389413761920 100644
--- a/llvm/test/CodeGen/DirectX/log.ll
+++ b/llvm/test/CodeGen/DirectX/log.ll
@@ -5,7 +5,7 @@
 
 define noundef float @log_float(float noundef %a) #0 {
 entry:
-; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call float @llvm.log2.f32(float %a)
 ; CHECK: fmul float 0x3FE62E4300000000, %{{.*}}
   %elt.log = call float @llvm.log.f32(float %a)
@@ -14,12 +14,14 @@ entry:
 
 define noundef half @log_half(half noundef %a) #0 {
 entry:
-; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.log2.f16(half %a)
 ; CHECK: fmul half 0xH398C, %{{.*}}
   %elt.log = call half @llvm.log.f16(half %a)
   ret half %elt.log
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log.f16(half)
 declare float @llvm.log.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log10.ll b/llvm/test/CodeGen/DirectX/log10.ll
index f3acccce7e451..3f40f80310ce2 100644
--- a/llvm/test/CodeGen/DirectX/log10.ll
+++ b/llvm/test/CodeGen/DirectX/log10.ll
@@ -5,7 +5,7 @@
 
 define noundef float @log10_float(float noundef %a) #0 {
 entry:
-; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call float @llvm.log2.f32(float %a)
 ; CHECK: fmul float 0x3FD3441340000000, %{{.*}}
   %elt.log10 = call float @llvm.log10.f32(float %a)
@@ -14,12 +14,14 @@ entry:
 
 define noundef half @log10_half(half noundef %a) #0 {
 entry:
-; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.log2.f16(half %a)
 ; CHECK: fmul half 0xH34D1, %{{.*}}
   %elt.log10 = call half @llvm.log10.f16(half %a)
   ret half %elt.log10
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log10.f16(half)
 declare float @llvm.log10.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log2.ll b/llvm/test/CodeGen/DirectX/log2.ll
index d6a7ba0b7dda7..eaf1183a2c810 100644
--- a/llvm/test/CodeGen/DirectX/log2.ll
+++ b/llvm/test/CodeGen/DirectX/log2.ll
@@ -4,17 +4,19 @@
 
 define noundef float @log2_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
   %elt.log2 = call float @llvm.log2.f32(float %a)
   ret float %elt.log2
 }
 
 define noundef half @log2_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
   %elt.log2 = call half @llvm.log2.f16(half %a)
   ret half %elt.log2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log2.f16(half)
 declare float @llvm.log2.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll
index a79b901408cf2..acd00b8d9b8d5 100644
--- a/llvm/test/CodeGen/DirectX/reversebits.ll
+++ b/llvm/test/CodeGen/DirectX/reversebits.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind
 define noundef i16 @test_bitreverse_short(i16 noundef %a) {
 entry:
-; CHECK:call i16 @dx.op.unary.i16(i32 30, i16 %{{.*}})
+; CHECK:call i16 @dx.op.unary.i16(i32 30, i16 %{{.*}}) #[[#ATTR:]]
   %elt.bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
   ret i16 %elt.bitreverse
 }
@@ -13,7 +13,7 @@ entry:
 ; Function Attrs: nounwind
 define noundef i32 @test_bitreverse_int(i32 noundef %a) {
 entry:
-; CHECK:call i32 @dx.op.unary.i32(i32 30, i32 %{{.*}})
+; CHECK:call i32 @dx.op.unary.i32(i32 30, i32 %{{.*}}) #[[#ATTR]]
   %elt.bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
   ret i32 %elt.bitreverse
 }
@@ -21,7 +21,7 @@ entry:
 ; Function Attrs: nounwind
 define noundef i64 @test_bitreverse_long(i64 noundef %a) {
 entry:
-; CHECK:call i64 @dx.op.unary.i64(i32 30, i64 %{{.*}})
+; CHECK:call i64 @dx.op.unary.i64(i32 30, i64 %{{.*}}) #[[#ATTR]]
   %elt.bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
   ret i64 %elt.bitreverse
 }
@@ -29,13 +29,13 @@ entry:
 define noundef <4 x i32> @bitreverse_int324(<4 x i32> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -44,6 +44,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.bitreverse.i16(i16)
 declare i32 @llvm.bitreverse.i32(i32)
 declare i64 @llvm.bitreverse.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll
index b08cbac5f42e9..165a201281b31 100644
--- a/llvm/test/CodeGen/DirectX/round.ll
+++ b/llvm/test/CodeGen/DirectX/round.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: round_half
 define noundef half @round_half(half noundef %a) {
 entry:
-; CHECK: call half @dx.op.unary.f16(i32 26, half %{{.*}})
+; CHECK: call half @dx.op.unary.f16(i32 26, half %{{.*}}) #[[#ATTR:]]
   %elt.roundeven = call half @llvm.roundeven.f16(half %a)
   ret half %elt.roundeven
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL: round_float
 define noundef float @round_float(float noundef %a) {
 entry:
-; CHECK: call float @dx.op.unary.f32(i32 26, float %{{.*}})
+; CHECK: call float @dx.op.unary.f32(i32 26, float %{{.*}}) #[[#ATTR]]
   %elt.roundeven = call float @llvm.roundeven.f32(float %a)
   ret float %elt.roundeven
 }
@@ -21,13 +21,13 @@ entry:
 define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -36,6 +36,7 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare half @llvm.roundeven.f16(half)
 declare float @llvm.roundeven.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/rsqrt.ll b/llvm/test/CodeGen/DirectX/rsqrt.ll
index 612b6222e7594..f755025201bf4 100644
--- a/llvm/test/CodeGen/DirectX/rsqrt.ll
+++ b/llvm/test/CodeGen/DirectX/rsqrt.ll
@@ -7,7 +7,7 @@
 define noundef float @rsqrt_float(float noundef %a) {
 ; CHECK-SAME: float noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]])
+; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]]) #[[#ATTR:]]
 ; CHECK-NEXT:    ret float [[DX_RSQRT1]]
 ;
 entry:
@@ -19,7 +19,7 @@ entry:
 define noundef half @rsqrt_half(half noundef %a) {
 ; CHECK-SAME: half noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]])
+; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]]) #[[#ATTR]]
 ; CHECK-NEXT:    ret half [[DX_RSQRT1]]
 ;
 entry:
@@ -32,13 +32,13 @@ define noundef <4 x float> @rsqrt_float4(<4 x float> noundef %a) #0 {
 ; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
-; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]])
+; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
-; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]])
+; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
-; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]])
+; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
-; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]])
+; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
 ; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
 ; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
@@ -50,6 +50,7 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare half @llvm.dx.rsqrt.f16(half)
 declare float @llvm.dx.rsqrt.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll
index 404cab7b665d0..0bb1e55421046 100644
--- a/llvm/test/CodeGen/DirectX/saturate.ll
+++ b/llvm/test/CodeGen/DirectX/saturate.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: test_saturate_half
 define noundef half @test_saturate_half(half noundef %p0) {
 entry:
-  ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0)
+  ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0) #[[#ATTR:]]
   %hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0)
   ; CHECK: ret half
   ret half %hlsl.saturate
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL: test_saturate_float
 define noundef float @test_saturate_float(float noundef %p0) {
 entry:
-  ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0)
+  ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0) #[[#ATTR]]
   %hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0)
   ; CHECK: ret float
   ret float %hlsl.saturate
@@ -22,12 +22,14 @@ entry:
 ; CHECK-LABEL: test_saturate_double
 define noundef double @test_saturate_double(double noundef %p0) {
 entry:
-  ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0)
+  ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0) #[[#ATTR]]
   %hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0)
   ; CHECK: ret double
   ret double %hlsl.saturate
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.dx.saturate.f16(half)
 declare float @llvm.dx.saturate.f32(float)
 declare double @llvm.dx.saturate.f64(double)
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index ac6b217be80e7..a403e7ce7ad03 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sin_float(float noundef %a) {
 entry:
-  ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
+  ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) #[[#ATTR:]]
   %1 = call float @llvm.sin.f32(float %a)
   ret float %1
 }
 
 define noundef half @sin_half(half noundef %a) {
 entry:
-  ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
+  ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) #[[#ATTR]]
   %1 = call half @llvm.sin.f16(half %a)
   ret half %1
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sin_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sin.f16(half)
 declare float @llvm.sin.f32(float)
 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll
index deba726e8d9ad..5cbbdb09e9df0 100644
--- a/llvm/test/CodeGen/DirectX/sinh.ll
+++ b/llvm/test/CodeGen/DirectX/sinh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sinh_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}}) #[[#ATTR:]]
   %elt.sinh = call float @llvm.sinh.f32(float %a)
   ret float %elt.sinh
 }
 
 define noundef half @sinh_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}}) #[[#ATTR]]
   %elt.sinh = call half @llvm.sinh.f16(half %a)
   ret half %elt.sinh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sinh.f16(half)
 declare float @llvm.sinh.f32(float)
 declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll
index bcda51cb0bfba..2165a11b7d606 100644
--- a/llvm/test/CodeGen/DirectX/smax.ll
+++ b/llvm/test/CodeGen/DirectX/smax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_smax_i16
 define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_smax_i32
 define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_smax_i64
 define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.smax.i16(i16, i16)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i64 @llvm.smax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll
index 8d4884704df21..afa04532f1509 100644
--- a/llvm/test/CodeGen/DirectX/smin.ll
+++ b/llvm/test/CodeGen/DirectX/smin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_smin_i16
 define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_smin_i32
 define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_smin_i64
 define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.smin.i16(i16, i16)
 declare i32 @llvm.smin.i32(i32, i32)
 declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll
index 1443ba6269255..97a9575c64633 100644
--- a/llvm/test/CodeGen/DirectX/splitdouble.ll
+++ b/llvm/test/CodeGen/DirectX/splitdouble.ll
@@ -5,7 +5,7 @@ define i32 @test_scalar(double noundef %D) {
 ; CHECK-LABEL: define i32 @test_scalar(
 ; CHECK-SAME: double noundef [[D:%.*]]) {
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]]) #[[#ATTR:]]
 ; NOLOWER-NEXT:    [[EV1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
 ; NOLOWER-NEXT:    [[EV2:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
 ; WITHLOWER-NEXT:  [[EV1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
@@ -26,10 +26,10 @@ define void @test_vector_double_split_void(<2 x double> noundef %d) {
 ; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
 ; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:      ret void
 ;
   %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
@@ -41,13 +41,13 @@ define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
 ; CHECK-SAME: <3 x double> noundef [[D:%.*]]) {
 ; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]]) #[[#ATTR]]
 ; NOLOWER-NEXT:    [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
 ; WITHLOWER-NEXT:  [[DOTELEM0:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
 ; NOLOWER-NEXT:    [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0
@@ -74,3 +74,5 @@ define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
   %3 = add <3 x i32> %1, %2
   ret <3 x i32> %3
 }
+
+; WITHLOWER: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll
index e2955b4efa2ec..0bdbb24099aa7 100644
--- a/llvm/test/CodeGen/DirectX/sqrt.ll
+++ b/llvm/test/CodeGen/DirectX/sqrt.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sqrt_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}}) #[[#ATTR:]]
   %elt.sqrt = call float @llvm.sqrt.f32(float %a)
   ret float %elt.sqrt
 }
 
 define noundef half @sqrt_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}}) #[[#ATTR]]
   %elt.sqrt = call half @llvm.sqrt.f16(half %a)
   ret half %elt.sqrt
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sqrt.f16(half)
 declare float @llvm.sqrt.f32(float)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
index cf6965a95c04e..753198bd11051 100644
--- a/llvm/test/CodeGen/DirectX/tan.ll
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -4,14 +4,14 @@
 
 define noundef float @tan_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}}) #[[#ATTR:]]
   %elt.tan = call float @llvm.tan.f32(float %a)
   ret float %elt.tan
 }
 
 define noundef half @tan_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}}) #[[#ATTR]]
   %elt.tan = call half @llvm.tan.f16(half %a)
   ret half %elt.tan
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.tan.f16(half)
 declare float @llvm.tan.f32(float)
 declare <4 x float> @llvm.tan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll
index 54ec6f29fa0c3..14387508c2e24 100644
--- a/llvm/test/CodeGen/DirectX/tanh.ll
+++ b/llvm/test/CodeGen/DirectX/tanh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @tan_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 20, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 20, float %{{.*}}) #[[#ATTR:]]
   %elt.tanh = call float @llvm.tanh.f32(float %a)
   ret float %elt.tanh
 }
 
 define noundef half @tan_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 20, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 20, half %{{.*}}) #[[#ATTR]]
   %elt.tanh = call half @llvm.tanh.f16(half %a)
   ret half %elt.tanh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.tanh.f16(half)
 declare float @llvm.tanh.f32(float)
 declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll
index 6d9c222595c44..c8bdeb0d42dce 100644
--- a/llvm/test/CodeGen/DirectX/trunc.ll
+++ b/llvm/test/CodeGen/DirectX/trunc.ll
@@ -4,14 +4,14 @@
 
 define noundef float @trunc_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 29, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 29, float %{{.*}}) #[[#ATTR:]]
   %elt.trunc = call float @llvm.trunc.f32(float %a)
   ret float %elt.trunc
 }
 
 define noundef half @trunc_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 29, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 29, half %{{.*}}) #[[#ATTR]]
   %elt.trunc = call half @llvm.trunc.f16(half %a)
   ret half %elt.trunc
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.trunc.f16(half)
 declare float @llvm.trunc.f32(float)
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/umad.ll b/llvm/test/CodeGen/DirectX/umad.ll
index 583fdddfe03f3..104d2380af66b 100644
--- a/llvm/test/CodeGen/DirectX/umad.ll
+++ b/llvm/test/CodeGen/DirectX/umad.ll
@@ -1,9 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
+; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll
index a4bd66ef0bd6c..623ca2715aeb7 100644
--- a/llvm/test/CodeGen/DirectX/umax.ll
+++ b/llvm/test/CodeGen/DirectX/umax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_umax_i16
 define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_umax_i32
 define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_umax_i64
 define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.umax.i16(i16, i16)
 declare i32 @llvm.umax.i32(i32, i32)
 declare i64 @llvm.umax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll
index a551f8ff3bfa9..7546accc4d3b6 100644
--- a/llvm/test/CodeGen/DirectX/umin.ll
+++ b/llvm/test/CodeGen/DirectX/umin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_umin_i16
 define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_umin_i32
 define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_umin_i64
 define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.umin.i16(i16, i16)
 declare i32 @llvm.umin.i32(i32, i32)
 declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll b/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
index 2265dd8f7348c..6740d4075eee5 100644
--- a/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
+++ b/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
@@ -7,6 +7,8 @@ entry:
   ret void
 }
 
+; CHECK-NOT: attributes {{.*}} memory(none)
+
 declare i1 @llvm.dx.wave.is.first.lane() #1
 
 attributes #0 = { convergent norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
new file mode 100644
index 0000000000000..05fbb746bd9d3
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; Test scalar_to_vector expansion.
+
+define <32 x i8> @scalar_to_32xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_32xi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <32 x i8> poison, i8 %val, i32 0
+  ret <32 x i8> %ret
+}
+
+define <16 x i16> @scalar_to_16xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_16xi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <16 x i16> poison, i16 %val, i32 0
+  ret <16 x i16> %ret
+}
+
+define <8 x i32> @scalar_to_8xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_8xi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <8 x i32> poison, i32 %val, i32 0
+  ret <8 x i32> %ret
+}
+
+define <4 x i64> @scalar_to_4xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_4xi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <4 x i64> poison, i64 %val, i32 0
+  ret <4 x i64> %ret
+}
+
+define <8 x float> @scalar_to_8xf32(float %val) {
+; CHECK-LABEL: scalar_to_8xf32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    ret
+  %ret = insertelement <8 x float> poison, float %val, i32 0
+  ret <8 x float> %ret
+}
+
+define <4 x double> @scalar_to_4xf64(double %val) {
+; CHECK-LABEL: scalar_to_4xf64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT:    ret
+  %ret = insertelement <4 x double> poison, double %val, i32 0
+  ret <4 x double> %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index ba19fe75d7570..eaab6524c5317 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -369,19 +369,15 @@ entry:
   ret void
 }
 
-;; BUILD_VECTOR through stack.
 ;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop.
 define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
 ; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; CHECK-NEXT:    st.d $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x i32>, ptr %src
   %e = extractelement <4 x i32> %v, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
new file mode 100644
index 0000000000000..87b68ac591727
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+;; Test scalar_to_vector expansion.
+
+define <16 x i8> @scalar_to_16xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_16xi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <16 x i8> poison, i8 %val, i32 0
+  ret <16 x i8> %ret
+}
+
+define <8 x i16> @scalar_to_8xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_8xi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <8 x i16> poison, i16 %val, i32 0
+  ret <8 x i16> %ret
+}
+
+define <4 x i32> @scalar_to_4xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_4xi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <4 x i32> poison, i32 %val, i32 0
+  ret <4 x i32> %ret
+}
+
+define <2 x i64> @scalar_to_2xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_2xi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %ret = insertelement <2 x i64> poison, i64 %val, i32 0
+  ret <2 x i64> %ret
+}
+
+define <4 x float> @scalar_to_4xf32(float %val) {
+; CHECK-LABEL: scalar_to_4xf32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT:    ret
+  %ret = insertelement <4 x float> poison, float %val, i32 0
+  ret <4 x float> %ret
+}
+
+define <2 x double> @scalar_to_2xf64(double %val) {
+; CHECK-LABEL: scalar_to_2xf64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT:    ret
+  %ret = insertelement <2 x double> poison, double %val, i32 0
+  ret <2 x double> %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index da26e9846301a..ab76d4e998d2b 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -60,6 +60,9 @@
 ; LAXX-NEXT:       Block Frequency Analysis
 ; LAXX-NEXT:       Constant Hoisting
 ; LAXX-NEXT:       Replace intrinsics with calls to vector library
+; LAXX-NEXT:       Lazy Branch Probability Analysis
+; LAXX-NEXT:       Lazy Block Frequency Analysis
+; LAXX-NEXT:       Optimization Remark Emitter
 ; LAXX-NEXT:       Partially inline calls to library functions
 ; LAXX-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; LAXX-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 3e1b6d8eaadbc..0bdace6b60112 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -126,17 +126,14 @@ define void @test_f2(ptr %P, ptr %S) nounwind {
 ;
 ; LA64D-LABEL: test_f2:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    addi.d $sp, $sp, -16
 ; LA64D-NEXT:    ld.d $a0, $a0, 0
-; LA64D-NEXT:    st.d $a0, $sp, 0
-; LA64D-NEXT:    vld $vr0, $sp, 0
+; LA64D-NEXT:    vinsgr2vr.d $vr0, $a0, 0
 ; LA64D-NEXT:    lu12i.w $a0, 260096
 ; LA64D-NEXT:    lu52i.d $a0, $a0, 1024
 ; LA64D-NEXT:    vreplgr2vr.d $vr1, $a0
 ; LA64D-NEXT:    vfadd.s $vr0, $vr0, $vr1
 ; LA64D-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64D-NEXT:    st.d $a0, $a1, 0
-; LA64D-NEXT:    addi.d $sp, $sp, 16
 ; LA64D-NEXT:    ret
   %p = load %f2, ptr %P
   %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 >
diff --git a/llvm/test/CodeGen/NVPTX/f32-lg2.ll b/llvm/test/CodeGen/NVPTX/f32-lg2.ll
index 13324c6860926..43c521978fed8 100644
--- a/llvm/test/CodeGen/NVPTX/f32-lg2.ll
+++ b/llvm/test/CodeGen/NVPTX/f32-lg2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_20 -mattr=+ptx32 | FileCheck --check-prefixes=CHECK %s
 ; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 -mattr=+ptx32 | %ptxas-verify %}
-target triple = "nvptx-nvidia-cuda"
+target triple = "nvptx64-nvidia-cuda"
 
 declare float @llvm.nvvm.lg2.approx.f(float)
 declare float @llvm.nvvm.lg2.approx.ftz.f(float)
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
index 83cb3cde48de1..ac5875c6ab104 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -1,9 +1,9 @@
 ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
 ; Verify that __nvvm_reflect() is replaced with an appropriate value.
 ;
-; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
-; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35
 
 @"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
index bf8d6e2cca307..9d383218dce86 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
@@ -1,8 +1,8 @@
 ; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
 ;
-; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
-; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35
 
 @"$str" = private addrspace(4) constant [12 x i8] c"__CUDA_ARCH\00"
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 64325ec772459..3920d75c83ffe 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -61,6 +61,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
index 4a036a7868c1a..95ff0d9a3a9c6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
@@ -3628,15 +3628,15 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9LE-LABEL: v2ppcf128_fast:
 ; PWR9LE:       # %bb.0: # %entry
 ; PWR9LE-NEXT:    mflr r0
-; PWR9LE-NEXT:    stdu r1, -64(r1)
-; PWR9LE-NEXT:    std r0, 80(r1)
+; PWR9LE-NEXT:    stdu r1, -48(r1)
+; PWR9LE-NEXT:    std r0, 64(r1)
 ; PWR9LE-NEXT:    bl __gcc_qadd
 ; PWR9LE-NEXT:    nop
 ; PWR9LE-NEXT:    stfd f2, 40(r1)
 ; PWR9LE-NEXT:    stfd f1, 32(r1)
 ; PWR9LE-NEXT:    lxv vs1, 32(r1)
 ; PWR9LE-NEXT:    xxswapd vs2, vs1
-; PWR9LE-NEXT:    addi r1, r1, 64
+; PWR9LE-NEXT:    addi r1, r1, 48
 ; PWR9LE-NEXT:    ld r0, 16(r1)
 ; PWR9LE-NEXT:    mtlr r0
 ; PWR9LE-NEXT:    blr
@@ -3644,15 +3644,15 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9BE-LABEL: v2ppcf128_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    stdu r1, -144(r1)
-; PWR9BE-NEXT:    std r0, 160(r1)
+; PWR9BE-NEXT:    stdu r1, -128(r1)
+; PWR9BE-NEXT:    std r0, 144(r1)
 ; PWR9BE-NEXT:    bl __gcc_qadd
 ; PWR9BE-NEXT:    nop
 ; PWR9BE-NEXT:    stfd f2, 120(r1)
 ; PWR9BE-NEXT:    stfd f1, 112(r1)
 ; PWR9BE-NEXT:    lxv vs1, 112(r1)
 ; PWR9BE-NEXT:    xxswapd vs2, vs1
-; PWR9BE-NEXT:    addi r1, r1, 144
+; PWR9BE-NEXT:    addi r1, r1, 128
 ; PWR9BE-NEXT:    ld r0, 16(r1)
 ; PWR9BE-NEXT:    mtlr r0
 ; PWR9BE-NEXT:    blr
@@ -3661,13 +3661,13 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR10LE:       # %bb.0: # %entry
 ; PWR10LE-NEXT:    mflr r0
 ; PWR10LE-NEXT:    std r0, 16(r1)
-; PWR10LE-NEXT:    stdu r1, -64(r1)
+; PWR10LE-NEXT:    stdu r1, -48(r1)
 ; PWR10LE-NEXT:    bl __gcc_qadd@notoc
 ; PWR10LE-NEXT:    stfd f2, 40(r1)
 ; PWR10LE-NEXT:    stfd f1, 32(r1)
 ; PWR10LE-NEXT:    lxv vs1, 32(r1)
 ; PWR10LE-NEXT:    xxswapd vs2, vs1
-; PWR10LE-NEXT:    addi r1, r1, 64
+; PWR10LE-NEXT:    addi r1, r1, 48
 ; PWR10LE-NEXT:    ld r0, 16(r1)
 ; PWR10LE-NEXT:    mtlr r0
 ; PWR10LE-NEXT:    blr
@@ -3676,14 +3676,14 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR10BE:       # %bb.0: # %entry
 ; PWR10BE-NEXT:    mflr r0
 ; PWR10BE-NEXT:    std r0, 16(r1)
-; PWR10BE-NEXT:    stdu r1, -144(r1)
+; PWR10BE-NEXT:    stdu r1, -128(r1)
 ; PWR10BE-NEXT:    bl __gcc_qadd
 ; PWR10BE-NEXT:    nop
 ; PWR10BE-NEXT:    stfd f2, 120(r1)
 ; PWR10BE-NEXT:    stfd f1, 112(r1)
 ; PWR10BE-NEXT:    lxv vs1, 112(r1)
 ; PWR10BE-NEXT:    xxswapd vs2, vs1
-; PWR10BE-NEXT:    addi r1, r1, 144
+; PWR10BE-NEXT:    addi r1, r1, 128
 ; PWR10BE-NEXT:    ld r0, 16(r1)
 ; PWR10BE-NEXT:    mtlr r0
 ; PWR10BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index b0c756e26985b..668c734612447 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -63,6 +63,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
index 5d730da09ef83..7d37d91ee21b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
@@ -143,9 +143,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8
 define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) {
 ; CHECK-LABEL: match_v16i8_v1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vrgather.vi v10, v9, 0
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v10, v9, 0
 ; CHECK-NEXT:    vmseq.vv v8, v8, v10
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
@@ -383,69 +382,63 @@ define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
 define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: match_v8i8_v16i8:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vrgather.vi v10, v9, 1
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.x.s a0, v9
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vslidedown.vi v11, v9, 2
-; CHECK-NEXT:    vmv.x.s a1, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 3
-; CHECK-NEXT:    vmv.x.s a2, v11
-; CHECK-NEXT:    vslidedown.vi v11, v9, 4
-; CHECK-NEXT:    vmv.x.s a3, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 5
-; CHECK-NEXT:    vmv.x.s a4, v11
-; CHECK-NEXT:    vslidedown.vi v11, v9, 6
-; CHECK-NEXT:    vmv.x.s a5, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 7
-; CHECK-NEXT:    vmv.x.s a6, v11
 ; CHECK-NEXT:    vslidedown.vi v11, v9, 8
-; CHECK-NEXT:    vmv.x.s a7, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 9
-; CHECK-NEXT:    vmv.x.s t0, v11
+; CHECK-NEXT:    vmv.x.s a0, v11
+; CHECK-NEXT:    vslidedown.vi v11, v9, 9
+; CHECK-NEXT:    vmv.x.s a1, v11
 ; CHECK-NEXT:    vslidedown.vi v11, v9, 10
-; CHECK-NEXT:    vmv.x.s t1, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 11
-; CHECK-NEXT:    vmv.x.s t2, v11
+; CHECK-NEXT:    vmv.x.s a2, v11
+; CHECK-NEXT:    vslidedown.vi v11, v9, 11
+; CHECK-NEXT:    vmv.x.s a3, v11
 ; CHECK-NEXT:    vslidedown.vi v11, v9, 12
-; CHECK-NEXT:    vmv.x.s t3, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 13
-; CHECK-NEXT:    vmv.x.s t4, v11
+; CHECK-NEXT:    vmv.x.s a4, v11
+; CHECK-NEXT:    vslidedown.vi v11, v9, 13
+; CHECK-NEXT:    vmv.x.s a5, v11
 ; CHECK-NEXT:    vslidedown.vi v11, v9, 14
-; CHECK-NEXT:    vslidedown.vi v9, v9, 15
-; CHECK-NEXT:    vmv.x.s t5, v10
+; CHECK-NEXT:    vmv.x.s a6, v11
+; CHECK-NEXT:    vslidedown.vi v11, v9, 15
+; CHECK-NEXT:    vmv.x.s a7, v11
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmseq.vx v10, v8, a0
-; CHECK-NEXT:    vmv.x.s a0, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, a1
-; CHECK-NEXT:    vmv.x.s a1, v9
-; CHECK-NEXT:    vmseq.vx v9, v8, a2
+; CHECK-NEXT:    vrgather.vi v11, v9, 0
+; CHECK-NEXT:    vmseq.vv v10, v8, v10
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
+; CHECK-NEXT:    vmor.mm v10, v11, v10
+; CHECK-NEXT:    vrgather.vi v11, v9, 2
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
 ; CHECK-NEXT:    vmor.mm v10, v10, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, a3
+; CHECK-NEXT:    vrgather.vi v11, v9, 3
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
+; CHECK-NEXT:    vmor.mm v10, v10, v11
+; CHECK-NEXT:    vrgather.vi v11, v9, 4
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
+; CHECK-NEXT:    vmor.mm v10, v10, v11
+; CHECK-NEXT:    vrgather.vi v11, v9, 5
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
+; CHECK-NEXT:    vmor.mm v10, v10, v11
+; CHECK-NEXT:    vrgather.vi v11, v9, 6
+; CHECK-NEXT:    vmseq.vv v11, v8, v11
+; CHECK-NEXT:    vmor.mm v10, v10, v11
+; CHECK-NEXT:    vmseq.vx v11, v8, a0
+; CHECK-NEXT:    vrgather.vi v12, v9, 7
+; CHECK-NEXT:    vmseq.vv v9, v8, v12
 ; CHECK-NEXT:    vmor.mm v9, v10, v9
-; CHECK-NEXT:    vmseq.vx v10, v8, a4
+; CHECK-NEXT:    vmseq.vx v10, v8, a1
 ; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, a5
+; CHECK-NEXT:    vmseq.vx v11, v8, a2
 ; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v10, v8, a6
+; CHECK-NEXT:    vmseq.vx v10, v8, a3
 ; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, a7
+; CHECK-NEXT:    vmseq.vx v11, v8, a4
 ; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v10, v8, t0
+; CHECK-NEXT:    vmseq.vx v10, v8, a5
 ; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, t1
+; CHECK-NEXT:    vmseq.vx v11, v8, a6
 ; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v10, v8, t2
 ; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, t3
-; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v10, v8, t4
-; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmseq.vx v11, v8, t5
-; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v10, v8, a0
-; CHECK-NEXT:    vmor.mm v9, v9, v11
-; CHECK-NEXT:    vmor.mm v9, v9, v10
-; CHECK-NEXT:    vmseq.vx v8, v8, a1
+; CHECK-NEXT:    vmseq.vx v8, v8, a7
 ; CHECK-NEXT:    vmor.mm v8, v9, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
@@ -456,387 +449,251 @@ define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
 define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) {
 ; RV32-LABEL: match_nxv16i8_v32i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -64
-; RV32-NEXT:    .cfi_def_cfa_offset 64
-; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    .cfi_offset s1, -12
-; RV32-NEXT:    .cfi_offset s2, -16
-; RV32-NEXT:    .cfi_offset s3, -20
-; RV32-NEXT:    .cfi_offset s4, -24
-; RV32-NEXT:    .cfi_offset s5, -28
-; RV32-NEXT:    .cfi_offset s6, -32
-; RV32-NEXT:    .cfi_offset s7, -36
-; RV32-NEXT:    .cfi_offset s8, -40
-; RV32-NEXT:    .cfi_offset s9, -44
-; RV32-NEXT:    .cfi_offset s10, -48
-; RV32-NEXT:    .cfi_offset s11, -52
-; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v10
-; RV32-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    vslidedown.vi v12, v10, 1
-; RV32-NEXT:    vslidedown.vi v13, v10, 2
-; RV32-NEXT:    vslidedown.vi v14, v10, 3
-; RV32-NEXT:    vslidedown.vi v15, v10, 4
-; RV32-NEXT:    vslidedown.vi v16, v10, 5
-; RV32-NEXT:    vslidedown.vi v17, v10, 6
-; RV32-NEXT:    vslidedown.vi v18, v10, 7
-; RV32-NEXT:    vslidedown.vi v19, v10, 8
-; RV32-NEXT:    vslidedown.vi v20, v10, 9
-; RV32-NEXT:    vslidedown.vi v21, v10, 10
-; RV32-NEXT:    vslidedown.vi v22, v10, 11
-; RV32-NEXT:    vslidedown.vi v23, v10, 12
-; RV32-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v24, v10, 16
-; RV32-NEXT:    vmv.x.s a1, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 17
-; RV32-NEXT:    vmv.x.s a2, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 18
-; RV32-NEXT:    vmv.x.s a3, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 19
-; RV32-NEXT:    vmv.x.s a4, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 20
-; RV32-NEXT:    vmv.x.s a5, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 21
-; RV32-NEXT:    vmv.x.s a6, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 22
-; RV32-NEXT:    vmv.x.s a7, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 23
-; RV32-NEXT:    vmv.x.s t0, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 24
-; RV32-NEXT:    vmv.x.s t1, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 25
-; RV32-NEXT:    vmv.x.s t2, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 26
-; RV32-NEXT:    vmv.x.s t3, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 27
-; RV32-NEXT:    vmv.x.s t4, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 28
-; RV32-NEXT:    vmv.x.s t5, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 29
-; RV32-NEXT:    vmv.x.s t6, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 30
-; RV32-NEXT:    vmv.x.s s0, v24
-; RV32-NEXT:    vslidedown.vi v24, v10, 31
-; RV32-NEXT:    vmv.x.s s1, v24
-; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v11, v10, 13
-; RV32-NEXT:    vslidedown.vi v24, v10, 14
-; RV32-NEXT:    vslidedown.vi v10, v10, 15
-; RV32-NEXT:    vmv.x.s s2, v12
-; RV32-NEXT:    vmv.x.s s3, v13
-; RV32-NEXT:    vmv.x.s s4, v14
-; RV32-NEXT:    vmv.x.s s5, v15
-; RV32-NEXT:    vmv.x.s s6, v16
-; RV32-NEXT:    vmv.x.s s7, v17
-; RV32-NEXT:    vmv.x.s s8, v18
-; RV32-NEXT:    vmv.x.s s9, v19
-; RV32-NEXT:    vmv.x.s s10, v20
-; RV32-NEXT:    vmv.x.s s11, v21
-; RV32-NEXT:    vmv.x.s ra, v22
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; RV32-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    vmseq.vx v12, v8, a0
-; RV32-NEXT:    vmv.x.s a0, v23
-; RV32-NEXT:    vmseq.vx v13, v8, s2
-; RV32-NEXT:    vmv.x.s s2, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s3
-; RV32-NEXT:    vmv.x.s s3, v24
-; RV32-NEXT:    vmseq.vx v14, v8, s4
-; RV32-NEXT:    vmv.x.s s4, v10
-; RV32-NEXT:    vmseq.vx v10, v8, s5
-; RV32-NEXT:    vmor.mm v12, v12, v13
-; RV32-NEXT:    vmseq.vx v13, v8, s6
-; RV32-NEXT:    vmor.mm v11, v12, v11
-; RV32-NEXT:    vmseq.vx v12, v8, s7
+; RV32-NEXT:    vrgather.vi v14, v10, 1
+; RV32-NEXT:    vrgather.vi v16, v10, 0
+; RV32-NEXT:    vrgather.vi v18, v10, 2
+; RV32-NEXT:    vrgather.vi v20, v10, 3
+; RV32-NEXT:    vrgather.vi v22, v10, 4
+; RV32-NEXT:    vrgather.vi v24, v10, 5
+; RV32-NEXT:    vrgather.vi v26, v10, 6
+; RV32-NEXT:    vrgather.vi v28, v10, 7
+; RV32-NEXT:    vmseq.vv v12, v8, v14
+; RV32-NEXT:    vmseq.vv v13, v8, v16
+; RV32-NEXT:    vrgather.vi v30, v10, 8
+; RV32-NEXT:    vmseq.vv v14, v8, v18
+; RV32-NEXT:    vmseq.vv v15, v8, v20
+; RV32-NEXT:    vrgather.vi v6, v10, 9
+; RV32-NEXT:    vmseq.vv v16, v8, v22
+; RV32-NEXT:    vmseq.vv v17, v8, v24
+; RV32-NEXT:    vrgather.vi v24, v10, 10
+; RV32-NEXT:    vmseq.vv v18, v8, v26
+; RV32-NEXT:    vmseq.vv v19, v8, v28
+; RV32-NEXT:    vrgather.vi v26, v10, 11
+; RV32-NEXT:    vmseq.vv v20, v8, v30
+; RV32-NEXT:    vmseq.vv v21, v8, v6
+; RV32-NEXT:    vrgather.vi v28, v10, 12
+; RV32-NEXT:    vmseq.vv v22, v8, v24
+; RV32-NEXT:    vmseq.vv v23, v8, v26
+; RV32-NEXT:    vrgather.vi v26, v10, 13
+; RV32-NEXT:    vmseq.vv v25, v8, v28
+; RV32-NEXT:    vmseq.vv v24, v8, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 16
+; RV32-NEXT:    vmv.x.s a0, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 17
+; RV32-NEXT:    vmv.x.s a1, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 18
+; RV32-NEXT:    vmv.x.s a2, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 19
+; RV32-NEXT:    vmv.x.s a3, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 20
+; RV32-NEXT:    vmv.x.s a4, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 21
+; RV32-NEXT:    vmv.x.s a5, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 22
+; RV32-NEXT:    vmv.x.s a6, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 23
+; RV32-NEXT:    vmv.x.s a7, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 24
+; RV32-NEXT:    vmv.x.s t0, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 25
+; RV32-NEXT:    vmv.x.s t1, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 26
+; RV32-NEXT:    vmv.x.s t2, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 27
+; RV32-NEXT:    vmv.x.s t3, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 28
+; RV32-NEXT:    vmv.x.s t4, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 29
+; RV32-NEXT:    vmv.x.s t5, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 30
+; RV32-NEXT:    vmv.x.s t6, v26
+; RV32-NEXT:    vslidedown.vi v26, v10, 31
+; RV32-NEXT:    vmv.x.s s0, v26
+; RV32-NEXT:    vrgather.vi v26, v10, 14
+; RV32-NEXT:    vmseq.vv v28, v8, v26
+; RV32-NEXT:    vrgather.vi v26, v10, 15
+; RV32-NEXT:    vmseq.vv v10, v8, v26
+; RV32-NEXT:    vmor.mm v11, v13, v12
 ; RV32-NEXT:    vmor.mm v11, v11, v14
-; RV32-NEXT:    vmseq.vx v14, v8, s8
+; RV32-NEXT:    vmor.mm v11, v11, v15
+; RV32-NEXT:    vmor.mm v11, v11, v16
+; RV32-NEXT:    vmor.mm v11, v11, v17
+; RV32-NEXT:    vmor.mm v11, v11, v18
+; RV32-NEXT:    vmor.mm v11, v11, v19
+; RV32-NEXT:    vmor.mm v11, v11, v20
+; RV32-NEXT:    vmor.mm v11, v11, v21
+; RV32-NEXT:    vmor.mm v11, v11, v22
+; RV32-NEXT:    vmor.mm v11, v11, v23
+; RV32-NEXT:    vmor.mm v11, v11, v25
+; RV32-NEXT:    vmseq.vx v12, v8, a0
+; RV32-NEXT:    vmor.mm v11, v11, v24
+; RV32-NEXT:    vmseq.vx v13, v8, a1
+; RV32-NEXT:    vmor.mm v11, v11, v28
+; RV32-NEXT:    vmseq.vx v14, v8, a2
 ; RV32-NEXT:    vmor.mm v10, v11, v10
-; RV32-NEXT:    vmseq.vx v11, v8, s9
-; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, s10
+; RV32-NEXT:    vmseq.vx v11, v8, a3
 ; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s11
-; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v14, v8, ra
-; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmseq.vx v11, v8, a0
+; RV32-NEXT:    vmseq.vx v12, v8, a4
 ; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, s2
-; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s3
+; RV32-NEXT:    vmseq.vx v13, v8, a5
 ; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v14, v8, s4
+; RV32-NEXT:    vmseq.vx v14, v8, a6
 ; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmseq.vx v11, v8, a1
-; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, a2
+; RV32-NEXT:    vmseq.vx v11, v8, a7
 ; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, a3
-; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v14, v8, a4
-; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmseq.vx v11, v8, a5
+; RV32-NEXT:    vmseq.vx v12, v8, t0
 ; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, a6
-; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, a7
+; RV32-NEXT:    vmseq.vx v13, v8, t1
 ; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v14, v8, t0
+; RV32-NEXT:    vmseq.vx v14, v8, t2
 ; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmseq.vx v11, v8, t1
-; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, t2
+; RV32-NEXT:    vmseq.vx v11, v8, t3
 ; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, t3
-; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v14, v8, t4
-; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmseq.vx v11, v8, t5
+; RV32-NEXT:    vmseq.vx v12, v8, t4
 ; RV32-NEXT:    vmor.mm v10, v10, v13
-; RV32-NEXT:    vmseq.vx v13, v8, t6
-; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s0
+; RV32-NEXT:    vmseq.vx v13, v8, t5
 ; RV32-NEXT:    vmor.mm v10, v10, v14
+; RV32-NEXT:    vmseq.vx v14, v8, t6
 ; RV32-NEXT:    vmor.mm v10, v10, v11
-; RV32-NEXT:    vmor.mm v10, v10, v13
 ; RV32-NEXT:    vmor.mm v10, v10, v12
-; RV32-NEXT:    vmseq.vx v11, v8, s1
+; RV32-NEXT:    vmor.mm v10, v10, v13
+; RV32-NEXT:    vmor.mm v10, v10, v14
+; RV32-NEXT:    vmseq.vx v11, v8, s0
 ; RV32-NEXT:    vmor.mm v8, v10, v11
 ; RV32-NEXT:    vmand.mm v0, v8, v0
-; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    .cfi_restore s0
-; RV32-NEXT:    .cfi_restore s1
-; RV32-NEXT:    .cfi_restore s2
-; RV32-NEXT:    .cfi_restore s3
-; RV32-NEXT:    .cfi_restore s4
-; RV32-NEXT:    .cfi_restore s5
-; RV32-NEXT:    .cfi_restore s6
-; RV32-NEXT:    .cfi_restore s7
-; RV32-NEXT:    .cfi_restore s8
-; RV32-NEXT:    .cfi_restore s9
-; RV32-NEXT:    .cfi_restore s10
-; RV32-NEXT:    .cfi_restore s11
-; RV32-NEXT:    addi sp, sp, 64
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: match_nxv16i8_v32i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -112
-; RV64-NEXT:    .cfi_def_cfa_offset 112
-; RV64-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    .cfi_offset s1, -24
-; RV64-NEXT:    .cfi_offset s2, -32
-; RV64-NEXT:    .cfi_offset s3, -40
-; RV64-NEXT:    .cfi_offset s4, -48
-; RV64-NEXT:    .cfi_offset s5, -56
-; RV64-NEXT:    .cfi_offset s6, -64
-; RV64-NEXT:    .cfi_offset s7, -72
-; RV64-NEXT:    .cfi_offset s8, -80
-; RV64-NEXT:    .cfi_offset s9, -88
-; RV64-NEXT:    .cfi_offset s10, -96
-; RV64-NEXT:    .cfi_offset s11, -104
-; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v10
-; RV64-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT:    vslidedown.vi v12, v10, 1
-; RV64-NEXT:    vslidedown.vi v13, v10, 2
-; RV64-NEXT:    vslidedown.vi v14, v10, 3
-; RV64-NEXT:    vslidedown.vi v15, v10, 4
-; RV64-NEXT:    vslidedown.vi v16, v10, 5
-; RV64-NEXT:    vslidedown.vi v17, v10, 6
-; RV64-NEXT:    vslidedown.vi v18, v10, 7
-; RV64-NEXT:    vslidedown.vi v19, v10, 8
-; RV64-NEXT:    vslidedown.vi v20, v10, 9
-; RV64-NEXT:    vslidedown.vi v21, v10, 10
-; RV64-NEXT:    vslidedown.vi v22, v10, 11
-; RV64-NEXT:    vslidedown.vi v23, v10, 12
-; RV64-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v24, v10, 16
-; RV64-NEXT:    vmv.x.s a1, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 17
-; RV64-NEXT:    vmv.x.s a2, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 18
-; RV64-NEXT:    vmv.x.s a3, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 19
-; RV64-NEXT:    vmv.x.s a4, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 20
-; RV64-NEXT:    vmv.x.s a5, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 21
-; RV64-NEXT:    vmv.x.s a6, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 22
-; RV64-NEXT:    vmv.x.s a7, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 23
-; RV64-NEXT:    vmv.x.s t0, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 24
-; RV64-NEXT:    vmv.x.s t1, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 25
-; RV64-NEXT:    vmv.x.s t2, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 26
-; RV64-NEXT:    vmv.x.s t3, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 27
-; RV64-NEXT:    vmv.x.s t4, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 28
-; RV64-NEXT:    vmv.x.s t5, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 29
-; RV64-NEXT:    vmv.x.s t6, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 30
-; RV64-NEXT:    vmv.x.s s0, v24
-; RV64-NEXT:    vslidedown.vi v24, v10, 31
-; RV64-NEXT:    vmv.x.s s1, v24
-; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v11, v10, 13
-; RV64-NEXT:    vslidedown.vi v24, v10, 14
-; RV64-NEXT:    vslidedown.vi v10, v10, 15
-; RV64-NEXT:    vmv.x.s s2, v12
-; RV64-NEXT:    vmv.x.s s3, v13
-; RV64-NEXT:    vmv.x.s s4, v14
-; RV64-NEXT:    vmv.x.s s5, v15
-; RV64-NEXT:    vmv.x.s s6, v16
-; RV64-NEXT:    vmv.x.s s7, v17
-; RV64-NEXT:    vmv.x.s s8, v18
-; RV64-NEXT:    vmv.x.s s9, v19
-; RV64-NEXT:    vmv.x.s s10, v20
-; RV64-NEXT:    vmv.x.s s11, v21
-; RV64-NEXT:    vmv.x.s ra, v22
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset s0, -8
 ; RV64-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; RV64-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT:    vmseq.vx v12, v8, a0
-; RV64-NEXT:    vmv.x.s a0, v23
-; RV64-NEXT:    vmseq.vx v13, v8, s2
-; RV64-NEXT:    vmv.x.s s2, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s3
-; RV64-NEXT:    vmv.x.s s3, v24
-; RV64-NEXT:    vmseq.vx v14, v8, s4
-; RV64-NEXT:    vmv.x.s s4, v10
-; RV64-NEXT:    vmseq.vx v10, v8, s5
-; RV64-NEXT:    vmor.mm v12, v12, v13
-; RV64-NEXT:    vmseq.vx v13, v8, s6
-; RV64-NEXT:    vmor.mm v11, v12, v11
-; RV64-NEXT:    vmseq.vx v12, v8, s7
+; RV64-NEXT:    vrgather.vi v14, v10, 1
+; RV64-NEXT:    vrgather.vi v16, v10, 0
+; RV64-NEXT:    vrgather.vi v18, v10, 2
+; RV64-NEXT:    vrgather.vi v20, v10, 3
+; RV64-NEXT:    vrgather.vi v22, v10, 4
+; RV64-NEXT:    vrgather.vi v24, v10, 5
+; RV64-NEXT:    vrgather.vi v26, v10, 6
+; RV64-NEXT:    vrgather.vi v28, v10, 7
+; RV64-NEXT:    vmseq.vv v12, v8, v14
+; RV64-NEXT:    vmseq.vv v13, v8, v16
+; RV64-NEXT:    vrgather.vi v30, v10, 8
+; RV64-NEXT:    vmseq.vv v14, v8, v18
+; RV64-NEXT:    vmseq.vv v15, v8, v20
+; RV64-NEXT:    vrgather.vi v6, v10, 9
+; RV64-NEXT:    vmseq.vv v16, v8, v22
+; RV64-NEXT:    vmseq.vv v17, v8, v24
+; RV64-NEXT:    vrgather.vi v24, v10, 10
+; RV64-NEXT:    vmseq.vv v18, v8, v26
+; RV64-NEXT:    vmseq.vv v19, v8, v28
+; RV64-NEXT:    vrgather.vi v26, v10, 11
+; RV64-NEXT:    vmseq.vv v20, v8, v30
+; RV64-NEXT:    vmseq.vv v21, v8, v6
+; RV64-NEXT:    vrgather.vi v28, v10, 12
+; RV64-NEXT:    vmseq.vv v22, v8, v24
+; RV64-NEXT:    vmseq.vv v23, v8, v26
+; RV64-NEXT:    vrgather.vi v26, v10, 13
+; RV64-NEXT:    vmseq.vv v25, v8, v28
+; RV64-NEXT:    vmseq.vv v24, v8, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 16
+; RV64-NEXT:    vmv.x.s a0, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 17
+; RV64-NEXT:    vmv.x.s a1, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 18
+; RV64-NEXT:    vmv.x.s a2, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 19
+; RV64-NEXT:    vmv.x.s a3, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 20
+; RV64-NEXT:    vmv.x.s a4, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 21
+; RV64-NEXT:    vmv.x.s a5, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 22
+; RV64-NEXT:    vmv.x.s a6, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 23
+; RV64-NEXT:    vmv.x.s a7, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 24
+; RV64-NEXT:    vmv.x.s t0, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 25
+; RV64-NEXT:    vmv.x.s t1, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 26
+; RV64-NEXT:    vmv.x.s t2, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 27
+; RV64-NEXT:    vmv.x.s t3, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 28
+; RV64-NEXT:    vmv.x.s t4, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 29
+; RV64-NEXT:    vmv.x.s t5, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 30
+; RV64-NEXT:    vmv.x.s t6, v26
+; RV64-NEXT:    vslidedown.vi v26, v10, 31
+; RV64-NEXT:    vmv.x.s s0, v26
+; RV64-NEXT:    vrgather.vi v26, v10, 14
+; RV64-NEXT:    vmseq.vv v28, v8, v26
+; RV64-NEXT:    vrgather.vi v26, v10, 15
+; RV64-NEXT:    vmseq.vv v10, v8, v26
+; RV64-NEXT:    vmor.mm v11, v13, v12
 ; RV64-NEXT:    vmor.mm v11, v11, v14
-; RV64-NEXT:    vmseq.vx v14, v8, s8
+; RV64-NEXT:    vmor.mm v11, v11, v15
+; RV64-NEXT:    vmor.mm v11, v11, v16
+; RV64-NEXT:    vmor.mm v11, v11, v17
+; RV64-NEXT:    vmor.mm v11, v11, v18
+; RV64-NEXT:    vmor.mm v11, v11, v19
+; RV64-NEXT:    vmor.mm v11, v11, v20
+; RV64-NEXT:    vmor.mm v11, v11, v21
+; RV64-NEXT:    vmor.mm v11, v11, v22
+; RV64-NEXT:    vmor.mm v11, v11, v23
+; RV64-NEXT:    vmor.mm v11, v11, v25
+; RV64-NEXT:    vmseq.vx v12, v8, a0
+; RV64-NEXT:    vmor.mm v11, v11, v24
+; RV64-NEXT:    vmseq.vx v13, v8, a1
+; RV64-NEXT:    vmor.mm v11, v11, v28
+; RV64-NEXT:    vmseq.vx v14, v8, a2
 ; RV64-NEXT:    vmor.mm v10, v11, v10
-; RV64-NEXT:    vmseq.vx v11, v8, s9
-; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, s10
+; RV64-NEXT:    vmseq.vx v11, v8, a3
 ; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s11
-; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v14, v8, ra
-; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmseq.vx v11, v8, a0
+; RV64-NEXT:    vmseq.vx v12, v8, a4
 ; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, s2
-; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s3
+; RV64-NEXT:    vmseq.vx v13, v8, a5
 ; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v14, v8, s4
+; RV64-NEXT:    vmseq.vx v14, v8, a6
 ; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmseq.vx v11, v8, a1
-; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, a2
+; RV64-NEXT:    vmseq.vx v11, v8, a7
 ; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, a3
-; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v14, v8, a4
-; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmseq.vx v11, v8, a5
+; RV64-NEXT:    vmseq.vx v12, v8, t0
 ; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, a6
-; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, a7
+; RV64-NEXT:    vmseq.vx v13, v8, t1
 ; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v14, v8, t0
+; RV64-NEXT:    vmseq.vx v14, v8, t2
 ; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmseq.vx v11, v8, t1
-; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, t2
+; RV64-NEXT:    vmseq.vx v11, v8, t3
 ; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, t3
-; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v14, v8, t4
-; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmseq.vx v11, v8, t5
+; RV64-NEXT:    vmseq.vx v12, v8, t4
 ; RV64-NEXT:    vmor.mm v10, v10, v13
-; RV64-NEXT:    vmseq.vx v13, v8, t6
-; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s0
+; RV64-NEXT:    vmseq.vx v13, v8, t5
 ; RV64-NEXT:    vmor.mm v10, v10, v14
+; RV64-NEXT:    vmseq.vx v14, v8, t6
 ; RV64-NEXT:    vmor.mm v10, v10, v11
-; RV64-NEXT:    vmor.mm v10, v10, v13
 ; RV64-NEXT:    vmor.mm v10, v10, v12
-; RV64-NEXT:    vmseq.vx v11, v8, s1
+; RV64-NEXT:    vmor.mm v10, v10, v13
+; RV64-NEXT:    vmor.mm v10, v10, v14
+; RV64-NEXT:    vmseq.vx v11, v8, s0
 ; RV64-NEXT:    vmor.mm v8, v10, v11
 ; RV64-NEXT:    vmand.mm v0, v8, v0
-; RV64-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    .cfi_restore s0
-; RV64-NEXT:    .cfi_restore s1
-; RV64-NEXT:    .cfi_restore s2
-; RV64-NEXT:    .cfi_restore s3
-; RV64-NEXT:    .cfi_restore s4
-; RV64-NEXT:    .cfi_restore s5
-; RV64-NEXT:    .cfi_restore s6
-; RV64-NEXT:    .cfi_restore s7
-; RV64-NEXT:    .cfi_restore s8
-; RV64-NEXT:    .cfi_restore s9
-; RV64-NEXT:    .cfi_restore s10
-; RV64-NEXT:    .cfi_restore s11
-; RV64-NEXT:    addi sp, sp, 112
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    .cfi_def_cfa_offset 0
 ; RV64-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -846,381 +703,255 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) {
 ; RV32-LABEL: match_v16i8_v32i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -64
-; RV32-NEXT:    .cfi_def_cfa_offset 64
-; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    .cfi_offset s1, -12
-; RV32-NEXT:    .cfi_offset s2, -16
-; RV32-NEXT:    .cfi_offset s3, -20
-; RV32-NEXT:    .cfi_offset s4, -24
-; RV32-NEXT:    .cfi_offset s5, -28
-; RV32-NEXT:    .cfi_offset s6, -32
-; RV32-NEXT:    .cfi_offset s7, -36
-; RV32-NEXT:    .cfi_offset s8, -40
-; RV32-NEXT:    .cfi_offset s9, -44
-; RV32-NEXT:    .cfi_offset s10, -48
-; RV32-NEXT:    .cfi_offset s11, -52
-; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v10
-; RV32-NEXT:    vslidedown.vi v9, v10, 1
-; RV32-NEXT:    vslidedown.vi v12, v10, 2
-; RV32-NEXT:    vslidedown.vi v13, v10, 3
-; RV32-NEXT:    vslidedown.vi v14, v10, 4
-; RV32-NEXT:    vslidedown.vi v15, v10, 5
-; RV32-NEXT:    vslidedown.vi v16, v10, 6
-; RV32-NEXT:    vslidedown.vi v17, v10, 7
-; RV32-NEXT:    vslidedown.vi v18, v10, 8
-; RV32-NEXT:    vslidedown.vi v19, v10, 9
-; RV32-NEXT:    vslidedown.vi v20, v10, 10
-; RV32-NEXT:    vslidedown.vi v21, v10, 11
-; RV32-NEXT:    vslidedown.vi v22, v10, 12
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vrgather.vi v9, v10, 1
+; RV32-NEXT:    vrgather.vi v12, v10, 0
+; RV32-NEXT:    vrgather.vi v13, v10, 2
+; RV32-NEXT:    vrgather.vi v14, v10, 3
+; RV32-NEXT:    vrgather.vi v15, v10, 4
+; RV32-NEXT:    vrgather.vi v16, v10, 5
+; RV32-NEXT:    vrgather.vi v17, v10, 6
+; RV32-NEXT:    vrgather.vi v18, v10, 7
+; RV32-NEXT:    vrgather.vi v19, v10, 8
+; RV32-NEXT:    vrgather.vi v20, v10, 9
+; RV32-NEXT:    vrgather.vi v21, v10, 10
+; RV32-NEXT:    vrgather.vi v22, v10, 11
+; RV32-NEXT:    vrgather.vi v23, v10, 12
 ; RV32-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v10, 16
-; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    vmv.x.s a0, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 17
-; RV32-NEXT:    vmv.x.s a2, v24
+; RV32-NEXT:    vmv.x.s a1, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 18
-; RV32-NEXT:    vmv.x.s a3, v24
+; RV32-NEXT:    vmv.x.s a2, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 19
-; RV32-NEXT:    vmv.x.s a4, v24
+; RV32-NEXT:    vmv.x.s a3, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 20
-; RV32-NEXT:    vmv.x.s a5, v24
+; RV32-NEXT:    vmv.x.s a4, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 21
-; RV32-NEXT:    vmv.x.s a6, v24
+; RV32-NEXT:    vmv.x.s a5, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 22
-; RV32-NEXT:    vmv.x.s a7, v24
+; RV32-NEXT:    vmv.x.s a6, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 23
-; RV32-NEXT:    vmv.x.s t0, v24
+; RV32-NEXT:    vmv.x.s a7, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 24
-; RV32-NEXT:    vmv.x.s t1, v24
+; RV32-NEXT:    vmv.x.s t0, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 25
-; RV32-NEXT:    vmv.x.s t2, v24
+; RV32-NEXT:    vmv.x.s t1, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 26
-; RV32-NEXT:    vmv.x.s t3, v24
+; RV32-NEXT:    vmv.x.s t2, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 27
-; RV32-NEXT:    vmv.x.s t4, v24
+; RV32-NEXT:    vmv.x.s t3, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 28
-; RV32-NEXT:    vmv.x.s t5, v24
+; RV32-NEXT:    vmv.x.s t4, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 29
-; RV32-NEXT:    vmv.x.s t6, v24
+; RV32-NEXT:    vmv.x.s t5, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 30
-; RV32-NEXT:    vmv.x.s s0, v24
+; RV32-NEXT:    vmv.x.s t6, v24
 ; RV32-NEXT:    vslidedown.vi v24, v10, 31
-; RV32-NEXT:    vmv.x.s s1, v24
+; RV32-NEXT:    vmv.x.s s0, v24
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v11, v10, 13
-; RV32-NEXT:    vslidedown.vi v23, v10, 14
-; RV32-NEXT:    vslidedown.vi v10, v10, 15
-; RV32-NEXT:    vmv.x.s s2, v9
-; RV32-NEXT:    vmv.x.s s3, v12
-; RV32-NEXT:    vmv.x.s s4, v13
-; RV32-NEXT:    vmv.x.s s5, v14
-; RV32-NEXT:    vmv.x.s s6, v15
-; RV32-NEXT:    vmv.x.s s7, v16
-; RV32-NEXT:    vmv.x.s s8, v17
-; RV32-NEXT:    vmv.x.s s9, v18
-; RV32-NEXT:    vmv.x.s s10, v19
-; RV32-NEXT:    vmv.x.s s11, v20
-; RV32-NEXT:    vmv.x.s ra, v21
-; RV32-NEXT:    vmseq.vx v9, v8, a0
-; RV32-NEXT:    vmv.x.s a0, v22
-; RV32-NEXT:    vmseq.vx v12, v8, s2
-; RV32-NEXT:    vmv.x.s s2, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s3
-; RV32-NEXT:    vmv.x.s s3, v23
-; RV32-NEXT:    vmseq.vx v13, v8, s4
-; RV32-NEXT:    vmv.x.s s4, v10
-; RV32-NEXT:    vmseq.vx v10, v8, s5
-; RV32-NEXT:    vmor.mm v9, v9, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s6
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s7
-; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, s8
+; RV32-NEXT:    vrgather.vi v11, v10, 13
+; RV32-NEXT:    vrgather.vi v24, v10, 14
+; RV32-NEXT:    vrgather.vi v25, v10, 15
+; RV32-NEXT:    vmseq.vv v9, v8, v9
+; RV32-NEXT:    vmseq.vv v10, v8, v12
+; RV32-NEXT:    vmor.mm v9, v10, v9
+; RV32-NEXT:    vmseq.vv v10, v8, v13
 ; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmseq.vx v10, v8, s9
-; RV32-NEXT:    vmor.mm v9, v9, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s10
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s11
-; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, ra
+; RV32-NEXT:    vmseq.vv v10, v8, v14
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v15
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v16
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v17
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v18
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v19
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v20
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v21
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v22
+; RV32-NEXT:    vmor.mm v9, v9, v10
+; RV32-NEXT:    vmseq.vv v10, v8, v23
 ; RV32-NEXT:    vmor.mm v9, v9, v10
 ; RV32-NEXT:    vmseq.vx v10, v8, a0
-; RV32-NEXT:    vmor.mm v9, v9, v12
-; RV32-NEXT:    vmseq.vx v12, v8, s2
+; RV32-NEXT:    vmseq.vv v11, v8, v11
 ; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s3
-; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, s4
-; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmseq.vx v10, v8, a1
+; RV32-NEXT:    vmseq.vx v11, v8, a1
+; RV32-NEXT:    vmseq.vv v12, v8, v24
 ; RV32-NEXT:    vmor.mm v9, v9, v12
 ; RV32-NEXT:    vmseq.vx v12, v8, a2
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, a3
+; RV32-NEXT:    vmseq.vv v13, v8, v25
 ; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, a4
+; RV32-NEXT:    vmseq.vx v13, v8, a3
 ; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmseq.vx v10, v8, a5
+; RV32-NEXT:    vmseq.vx v10, v8, a4
+; RV32-NEXT:    vmor.mm v9, v9, v11
+; RV32-NEXT:    vmseq.vx v11, v8, a5
 ; RV32-NEXT:    vmor.mm v9, v9, v12
 ; RV32-NEXT:    vmseq.vx v12, v8, a6
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, a7
 ; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, t0
+; RV32-NEXT:    vmseq.vx v13, v8, a7
 ; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmseq.vx v10, v8, t1
+; RV32-NEXT:    vmseq.vx v10, v8, t0
+; RV32-NEXT:    vmor.mm v9, v9, v11
+; RV32-NEXT:    vmseq.vx v11, v8, t1
 ; RV32-NEXT:    vmor.mm v9, v9, v12
 ; RV32-NEXT:    vmseq.vx v12, v8, t2
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, t3
 ; RV32-NEXT:    vmor.mm v9, v9, v13
-; RV32-NEXT:    vmseq.vx v13, v8, t4
+; RV32-NEXT:    vmseq.vx v13, v8, t3
 ; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmseq.vx v10, v8, t5
+; RV32-NEXT:    vmseq.vx v10, v8, t4
+; RV32-NEXT:    vmor.mm v9, v9, v11
+; RV32-NEXT:    vmseq.vx v11, v8, t5
 ; RV32-NEXT:    vmor.mm v9, v9, v12
 ; RV32-NEXT:    vmseq.vx v12, v8, t6
-; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v11, v8, s0
 ; RV32-NEXT:    vmor.mm v9, v9, v13
 ; RV32-NEXT:    vmor.mm v9, v9, v10
-; RV32-NEXT:    vmor.mm v9, v9, v12
 ; RV32-NEXT:    vmor.mm v9, v9, v11
-; RV32-NEXT:    vmseq.vx v8, v8, s1
+; RV32-NEXT:    vmor.mm v9, v9, v12
+; RV32-NEXT:    vmseq.vx v8, v8, s0
 ; RV32-NEXT:    vmor.mm v8, v9, v8
 ; RV32-NEXT:    vmand.mm v0, v8, v0
-; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    .cfi_restore s0
-; RV32-NEXT:    .cfi_restore s1
-; RV32-NEXT:    .cfi_restore s2
-; RV32-NEXT:    .cfi_restore s3
-; RV32-NEXT:    .cfi_restore s4
-; RV32-NEXT:    .cfi_restore s5
-; RV32-NEXT:    .cfi_restore s6
-; RV32-NEXT:    .cfi_restore s7
-; RV32-NEXT:    .cfi_restore s8
-; RV32-NEXT:    .cfi_restore s9
-; RV32-NEXT:    .cfi_restore s10
-; RV32-NEXT:    .cfi_restore s11
-; RV32-NEXT:    addi sp, sp, 64
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: match_v16i8_v32i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -112
-; RV64-NEXT:    .cfi_def_cfa_offset 112
-; RV64-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    .cfi_offset s1, -24
-; RV64-NEXT:    .cfi_offset s2, -32
-; RV64-NEXT:    .cfi_offset s3, -40
-; RV64-NEXT:    .cfi_offset s4, -48
-; RV64-NEXT:    .cfi_offset s5, -56
-; RV64-NEXT:    .cfi_offset s6, -64
-; RV64-NEXT:    .cfi_offset s7, -72
-; RV64-NEXT:    .cfi_offset s8, -80
-; RV64-NEXT:    .cfi_offset s9, -88
-; RV64-NEXT:    .cfi_offset s10, -96
-; RV64-NEXT:    .cfi_offset s11, -104
-; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v10
-; RV64-NEXT:    vslidedown.vi v9, v10, 1
-; RV64-NEXT:    vslidedown.vi v12, v10, 2
-; RV64-NEXT:    vslidedown.vi v13, v10, 3
-; RV64-NEXT:    vslidedown.vi v14, v10, 4
-; RV64-NEXT:    vslidedown.vi v15, v10, 5
-; RV64-NEXT:    vslidedown.vi v16, v10, 6
-; RV64-NEXT:    vslidedown.vi v17, v10, 7
-; RV64-NEXT:    vslidedown.vi v18, v10, 8
-; RV64-NEXT:    vslidedown.vi v19, v10, 9
-; RV64-NEXT:    vslidedown.vi v20, v10, 10
-; RV64-NEXT:    vslidedown.vi v21, v10, 11
-; RV64-NEXT:    vslidedown.vi v22, v10, 12
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset s0, -8
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vrgather.vi v9, v10, 1
+; RV64-NEXT:    vrgather.vi v12, v10, 0
+; RV64-NEXT:    vrgather.vi v13, v10, 2
+; RV64-NEXT:    vrgather.vi v14, v10, 3
+; RV64-NEXT:    vrgather.vi v15, v10, 4
+; RV64-NEXT:    vrgather.vi v16, v10, 5
+; RV64-NEXT:    vrgather.vi v17, v10, 6
+; RV64-NEXT:    vrgather.vi v18, v10, 7
+; RV64-NEXT:    vrgather.vi v19, v10, 8
+; RV64-NEXT:    vrgather.vi v20, v10, 9
+; RV64-NEXT:    vrgather.vi v21, v10, 10
+; RV64-NEXT:    vrgather.vi v22, v10, 11
+; RV64-NEXT:    vrgather.vi v23, v10, 12
 ; RV64-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v24, v10, 16
-; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    vmv.x.s a0, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 17
-; RV64-NEXT:    vmv.x.s a2, v24
+; RV64-NEXT:    vmv.x.s a1, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 18
-; RV64-NEXT:    vmv.x.s a3, v24
+; RV64-NEXT:    vmv.x.s a2, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 19
-; RV64-NEXT:    vmv.x.s a4, v24
+; RV64-NEXT:    vmv.x.s a3, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 20
-; RV64-NEXT:    vmv.x.s a5, v24
+; RV64-NEXT:    vmv.x.s a4, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 21
-; RV64-NEXT:    vmv.x.s a6, v24
+; RV64-NEXT:    vmv.x.s a5, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 22
-; RV64-NEXT:    vmv.x.s a7, v24
+; RV64-NEXT:    vmv.x.s a6, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 23
-; RV64-NEXT:    vmv.x.s t0, v24
+; RV64-NEXT:    vmv.x.s a7, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 24
-; RV64-NEXT:    vmv.x.s t1, v24
+; RV64-NEXT:    vmv.x.s t0, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 25
-; RV64-NEXT:    vmv.x.s t2, v24
+; RV64-NEXT:    vmv.x.s t1, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 26
-; RV64-NEXT:    vmv.x.s t3, v24
+; RV64-NEXT:    vmv.x.s t2, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 27
-; RV64-NEXT:    vmv.x.s t4, v24
+; RV64-NEXT:    vmv.x.s t3, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 28
-; RV64-NEXT:    vmv.x.s t5, v24
+; RV64-NEXT:    vmv.x.s t4, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 29
-; RV64-NEXT:    vmv.x.s t6, v24
+; RV64-NEXT:    vmv.x.s t5, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 30
-; RV64-NEXT:    vmv.x.s s0, v24
+; RV64-NEXT:    vmv.x.s t6, v24
 ; RV64-NEXT:    vslidedown.vi v24, v10, 31
-; RV64-NEXT:    vmv.x.s s1, v24
+; RV64-NEXT:    vmv.x.s s0, v24
 ; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v11, v10, 13
-; RV64-NEXT:    vslidedown.vi v23, v10, 14
-; RV64-NEXT:    vslidedown.vi v10, v10, 15
-; RV64-NEXT:    vmv.x.s s2, v9
-; RV64-NEXT:    vmv.x.s s3, v12
-; RV64-NEXT:    vmv.x.s s4, v13
-; RV64-NEXT:    vmv.x.s s5, v14
-; RV64-NEXT:    vmv.x.s s6, v15
-; RV64-NEXT:    vmv.x.s s7, v16
-; RV64-NEXT:    vmv.x.s s8, v17
-; RV64-NEXT:    vmv.x.s s9, v18
-; RV64-NEXT:    vmv.x.s s10, v19
-; RV64-NEXT:    vmv.x.s s11, v20
-; RV64-NEXT:    vmv.x.s ra, v21
-; RV64-NEXT:    vmseq.vx v9, v8, a0
-; RV64-NEXT:    vmv.x.s a0, v22
-; RV64-NEXT:    vmseq.vx v12, v8, s2
-; RV64-NEXT:    vmv.x.s s2, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s3
-; RV64-NEXT:    vmv.x.s s3, v23
-; RV64-NEXT:    vmseq.vx v13, v8, s4
-; RV64-NEXT:    vmv.x.s s4, v10
-; RV64-NEXT:    vmseq.vx v10, v8, s5
-; RV64-NEXT:    vmor.mm v9, v9, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s6
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s7
-; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, s8
+; RV64-NEXT:    vrgather.vi v11, v10, 13
+; RV64-NEXT:    vrgather.vi v24, v10, 14
+; RV64-NEXT:    vrgather.vi v25, v10, 15
+; RV64-NEXT:    vmseq.vv v9, v8, v9
+; RV64-NEXT:    vmseq.vv v10, v8, v12
+; RV64-NEXT:    vmor.mm v9, v10, v9
+; RV64-NEXT:    vmseq.vv v10, v8, v13
 ; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmseq.vx v10, v8, s9
-; RV64-NEXT:    vmor.mm v9, v9, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s10
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s11
-; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, ra
+; RV64-NEXT:    vmseq.vv v10, v8, v14
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v15
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v16
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v17
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v18
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v19
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v20
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v21
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v22
+; RV64-NEXT:    vmor.mm v9, v9, v10
+; RV64-NEXT:    vmseq.vv v10, v8, v23
 ; RV64-NEXT:    vmor.mm v9, v9, v10
 ; RV64-NEXT:    vmseq.vx v10, v8, a0
-; RV64-NEXT:    vmor.mm v9, v9, v12
-; RV64-NEXT:    vmseq.vx v12, v8, s2
+; RV64-NEXT:    vmseq.vv v11, v8, v11
 ; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s3
-; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, s4
-; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmseq.vx v10, v8, a1
+; RV64-NEXT:    vmseq.vx v11, v8, a1
+; RV64-NEXT:    vmseq.vv v12, v8, v24
 ; RV64-NEXT:    vmor.mm v9, v9, v12
 ; RV64-NEXT:    vmseq.vx v12, v8, a2
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, a3
+; RV64-NEXT:    vmseq.vv v13, v8, v25
 ; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, a4
+; RV64-NEXT:    vmseq.vx v13, v8, a3
 ; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmseq.vx v10, v8, a5
+; RV64-NEXT:    vmseq.vx v10, v8, a4
+; RV64-NEXT:    vmor.mm v9, v9, v11
+; RV64-NEXT:    vmseq.vx v11, v8, a5
 ; RV64-NEXT:    vmor.mm v9, v9, v12
 ; RV64-NEXT:    vmseq.vx v12, v8, a6
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, a7
 ; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, t0
+; RV64-NEXT:    vmseq.vx v13, v8, a7
 ; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmseq.vx v10, v8, t1
+; RV64-NEXT:    vmseq.vx v10, v8, t0
+; RV64-NEXT:    vmor.mm v9, v9, v11
+; RV64-NEXT:    vmseq.vx v11, v8, t1
 ; RV64-NEXT:    vmor.mm v9, v9, v12
 ; RV64-NEXT:    vmseq.vx v12, v8, t2
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, t3
 ; RV64-NEXT:    vmor.mm v9, v9, v13
-; RV64-NEXT:    vmseq.vx v13, v8, t4
+; RV64-NEXT:    vmseq.vx v13, v8, t3
 ; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmseq.vx v10, v8, t5
+; RV64-NEXT:    vmseq.vx v10, v8, t4
+; RV64-NEXT:    vmor.mm v9, v9, v11
+; RV64-NEXT:    vmseq.vx v11, v8, t5
 ; RV64-NEXT:    vmor.mm v9, v9, v12
 ; RV64-NEXT:    vmseq.vx v12, v8, t6
-; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v11, v8, s0
 ; RV64-NEXT:    vmor.mm v9, v9, v13
 ; RV64-NEXT:    vmor.mm v9, v9, v10
-; RV64-NEXT:    vmor.mm v9, v9, v12
 ; RV64-NEXT:    vmor.mm v9, v9, v11
-; RV64-NEXT:    vmseq.vx v8, v8, s1
+; RV64-NEXT:    vmor.mm v9, v9, v12
+; RV64-NEXT:    vmseq.vx v8, v8, s0
 ; RV64-NEXT:    vmor.mm v8, v9, v8
 ; RV64-NEXT:    vmand.mm v0, v8, v0
-; RV64-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    .cfi_restore s0
-; RV64-NEXT:    .cfi_restore s1
-; RV64-NEXT:    .cfi_restore s2
-; RV64-NEXT:    .cfi_restore s3
-; RV64-NEXT:    .cfi_restore s4
-; RV64-NEXT:    .cfi_restore s5
-; RV64-NEXT:    .cfi_restore s6
-; RV64-NEXT:    .cfi_restore s7
-; RV64-NEXT:    .cfi_restore s8
-; RV64-NEXT:    .cfi_restore s9
-; RV64-NEXT:    .cfi_restore s10
-; RV64-NEXT:    .cfi_restore s11
-; RV64-NEXT:    addi sp, sp, 112
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    .cfi_def_cfa_offset 0
 ; RV64-NEXT:    ret
   %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask)
diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll
index a556c3125c85d..47db3da3fbe7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll
@@ -165,8 +165,8 @@ define <vscale x 4 x i8> @splat_idx_nxv8i8_nxv4i8_constant_0(<vscale x 8 x i8> %
 ; CHECK-LABEL: splat_idx_nxv8i8_nxv4i8_constant_0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %x = extractelement <vscale x 8 x i8> %v, i64 0
   %ins = insertelement <vscale x 4 x i8> poison, i8 %x, i32 0
@@ -177,11 +177,9 @@ define <vscale x 4 x i8> @splat_idx_nxv8i8_nxv4i8_constant_0(<vscale x 8 x i8> %
 define <vscale x 4 x i8> @splat_idx_nxv8i8_nxv4i8_constant_3(<vscale x 8 x i8> %v) {
 ; CHECK-LABEL: splat_idx_nxv8i8_nxv4i8_constant_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 3
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 3
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %x = extractelement <vscale x 8 x i8> %v, i64 3
   %ins = insertelement <vscale x 4 x i8> poison, i8 %x, i32 0
@@ -210,8 +208,8 @@ define <8 x float> @splat_idx_nxv4f32_v8f32_constant_0(<vscale x 4 x float> %v)
 ; CHECK-LABEL: splat_idx_nxv4f32_v8f32_constant_0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vrgather.vi v10, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %x = extractelement <vscale x 4 x float> %v, i64 0
   %ins = insertelement <8 x float> poison, float %x, i32 0
@@ -222,11 +220,9 @@ define <8 x float> @splat_idx_nxv4f32_v8f32_constant_0(<vscale x 4 x float> %v)
 define <8 x float> @splat_idx_nxv4f32_v8f32_constant_7(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: splat_idx_nxv4f32_v8f32_constant_7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 7
-; CHECK-NEXT:    vfmv.f.s fa5, v8
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vrgather.vi v10, v8, 7
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %x = extractelement <vscale x 4 x float> %v, i64 7
   %ins = insertelement <8 x float> poison, float %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index ea8b166c156cb..cf73dceaae306 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB64
 
 define <vscale x 1 x i8> @vandn_vv_nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %y) {
 ; CHECK-LABEL: vandn_vv_nxv1i8:
@@ -1931,3 +1933,179 @@ define <vscale x 8 x i64> @vandn_vx_swapped_nxv8i64(i64 %x, <vscale x 8 x i64> %
   %b = and <vscale x 8 x i64> %splat, %y
   ret <vscale x 8 x i64> %b
 }
+
+define <vscale x 1 x i16> @vandn_vx_imm16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: vandn_vx_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vandn_vx_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a0, 1048568
+; CHECK-ZVKB-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vandn.vx v8, v8, a0
+; CHECK-ZVKB-NEXT:    ret
+  %a = and <vscale x 1 x i16> splat (i16 32767), %x
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 1 x i16> @vandn_vx_swapped_imm16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: vandn_vx_swapped_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vandn_vx_swapped_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a0, 1048568
+; CHECK-ZVKB-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vandn.vx v8, v8, a0
+; CHECK-ZVKB-NEXT:    ret
+  %a = and <vscale x 1 x i16> %x, splat (i16 32767)
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 1 x i64> @vandn_vx_imm64(<vscale x 1 x i64> %x) {
+; CHECK-RV32-LABEL: vandn_vx_imm64:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-RV32-NEXT:    lui a0, 1044480
+; CHECK-RV32-NEXT:    li a1, 255
+; CHECK-RV32-NEXT:    sw a1, 8(sp)
+; CHECK-RV32-NEXT:    sw a0, 12(sp)
+; CHECK-RV32-NEXT:    addi a0, sp, 8
+; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v9, (a0), zero
+; CHECK-RV32-NEXT:    vand.vv v8, v8, v9
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: vandn_vx_imm64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    li a0, -1
+; CHECK-RV64-NEXT:    slli a0, a0, 56
+; CHECK-RV64-NEXT:    addi a0, a0, 255
+; CHECK-RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV64-NEXT:    vand.vx v8, v8, a0
+; CHECK-RV64-NEXT:    ret
+;
+; CHECK-ZVKB32-LABEL: vandn_vx_imm64:
+; CHECK-ZVKB32:       # %bb.0:
+; CHECK-ZVKB32-NEXT:    addi sp, sp, -16
+; CHECK-ZVKB32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ZVKB32-NEXT:    lui a0, 1044480
+; CHECK-ZVKB32-NEXT:    li a1, 255
+; CHECK-ZVKB32-NEXT:    sw a1, 8(sp)
+; CHECK-ZVKB32-NEXT:    sw a0, 12(sp)
+; CHECK-ZVKB32-NEXT:    addi a0, sp, 8
+; CHECK-ZVKB32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-ZVKB32-NEXT:    vlse64.v v9, (a0), zero
+; CHECK-ZVKB32-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVKB32-NEXT:    addi sp, sp, 16
+; CHECK-ZVKB32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-ZVKB32-NEXT:    ret
+;
+; CHECK-ZVKB64-LABEL: vandn_vx_imm64:
+; CHECK-ZVKB64:       # %bb.0:
+; CHECK-ZVKB64-NEXT:    lui a0, 1048560
+; CHECK-ZVKB64-NEXT:    srli a0, a0, 8
+; CHECK-ZVKB64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-ZVKB64-NEXT:    vandn.vx v8, v8, a0
+; CHECK-ZVKB64-NEXT:    ret
+  %a = and <vscale x 1 x i64> %x, splat (i64 -72057594037927681)
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 1 x i16> @vandn_vx_multi_imm16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %y) {
+; CHECK-LABEL: vandn_vx_multi_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vandn_vx_multi_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a0, 1048572
+; CHECK-ZVKB-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vandn.vx v8, v8, a0
+; CHECK-ZVKB-NEXT:    vandn.vx v9, v9, a0
+; CHECK-ZVKB-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVKB-NEXT:    ret
+  %a = and <vscale x 1 x i16> %x, splat (i16 16383)
+  %b = and <vscale x 1 x i16> %y, splat (i16 16383)
+  %c = add <vscale x 1 x i16> %a, %b
+  ret <vscale x 1 x i16> %c
+}
+
+define <vscale x 1 x i16> @vandn_vx_multi_scalar_imm16(<vscale x 1 x i16> %x, i16 %y) {
+; CHECK-LABEL: vandn_vx_multi_scalar_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    vadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-NOZBB-LABEL: vandn_vx_multi_scalar_imm16:
+; CHECK-ZVKB-NOZBB:       # %bb.0:
+; CHECK-ZVKB-NOZBB-NEXT:    lui a1, 8
+; CHECK-ZVKB-NOZBB-NEXT:    addi a1, a1, -1
+; CHECK-ZVKB-NOZBB-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-NOZBB-NEXT:    vand.vx v8, v8, a1
+; CHECK-ZVKB-NOZBB-NEXT:    or a0, a0, a1
+; CHECK-ZVKB-NOZBB-NEXT:    vadd.vx v8, v8, a0
+; CHECK-ZVKB-NOZBB-NEXT:    ret
+;
+; CHECK-ZVKB-ZBB-LABEL: vandn_vx_multi_scalar_imm16:
+; CHECK-ZVKB-ZBB:       # %bb.0:
+; CHECK-ZVKB-ZBB-NEXT:    lui a1, 1048568
+; CHECK-ZVKB-ZBB-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-ZBB-NEXT:    vandn.vx v8, v8, a1
+; CHECK-ZVKB-ZBB-NEXT:    orn a0, a0, a1
+; CHECK-ZVKB-ZBB-NEXT:    vadd.vx v8, v8, a0
+; CHECK-ZVKB-ZBB-NEXT:    ret
+  %a = and <vscale x 1 x i16> %x, splat (i16 32767)
+  %b = or i16 %y, 32767
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %c = add <vscale x 1 x i16> %a, %splat
+  ret <vscale x 1 x i16> %c
+}
+
+define <vscale x 1 x i16> @vand_vadd_vx_imm16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: vand_vadd_vx_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vand_vadd_vx_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a0, 8
+; CHECK-ZVKB-NEXT:    addi a0, a0, -1
+; CHECK-ZVKB-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVKB-NEXT:    vadd.vx v8, v8, a0
+; CHECK-ZVKB-NEXT:    ret
+  %a = and <vscale x 1 x i16> %x, splat (i16 32767)
+  %b = add <vscale x 1 x i16> %a, splat (i16 32767)
+  ret <vscale x 1 x i16> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
index 763b2908b1026..5d29b266546f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
@@ -1429,3 +1429,117 @@ define <vscale x 8 x i64> @vandn_vx_vp_nxv8i64(i64 %a, <vscale x 8 x i64> %b, <v
   %x = call <vscale x 8 x i64> @llvm.vp.and.nxv8i64(<vscale x 8 x i64> %b, <vscale x 8 x i64> %splat.not.a, <vscale x 8 x i1> %mask, i32 %evl)
   ret <vscale x 8 x i64> %x
 }
+
+define <vscale x 1 x i16> @vandn_vx_vp_imm16(<vscale x 1 x i16> %x, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vandn_vx_vp_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vandn_vx_vp_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a1, 1048568
+; CHECK-ZVKB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vandn.vx v8, v8, a1, v0.t
+; CHECK-ZVKB-NEXT:    ret
+  %a = call <vscale x 1 x i16> @llvm.vp.and.nxv1i16(<vscale x 1 x i16> splat (i16 32767), <vscale x 1 x i16> %x, <vscale x 1 x i1> %mask, i32 %evl)
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 1 x i16> @vandn_vx_vp_swapped_imm16(<vscale x 1 x i16> %x, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vandn_vx_vp_swapped_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vandn_vx_vp_swapped_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a1, 1048568
+; CHECK-ZVKB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vandn.vx v8, v8, a1, v0.t
+; CHECK-ZVKB-NEXT:    ret
+  %a = call <vscale x 1 x i16> @llvm.vp.and.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> splat (i16 32767), <vscale x 1 x i1> %mask, i32 %evl)
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 1 x i64> @vandn_vx_vp_imm64(<vscale x 1 x i64> %x, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-RV32-LABEL: vandn_vx_vp_imm64:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-RV32-NEXT:    lui a1, 1044480
+; CHECK-RV32-NEXT:    li a2, 255
+; CHECK-RV32-NEXT:    sw a2, 8(sp)
+; CHECK-RV32-NEXT:    sw a1, 12(sp)
+; CHECK-RV32-NEXT:    addi a1, sp, 8
+; CHECK-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v9, (a1), zero
+; CHECK-RV32-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: vandn_vx_vp_imm64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    li a1, -1
+; CHECK-RV64-NEXT:    slli a1, a1, 56
+; CHECK-RV64-NEXT:    addi a1, a1, 255
+; CHECK-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-RV64-NEXT:    ret
+;
+; CHECK-ZVKB32-LABEL: vandn_vx_vp_imm64:
+; CHECK-ZVKB32:       # %bb.0:
+; CHECK-ZVKB32-NEXT:    addi sp, sp, -16
+; CHECK-ZVKB32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ZVKB32-NEXT:    lui a1, 1044480
+; CHECK-ZVKB32-NEXT:    li a2, 255
+; CHECK-ZVKB32-NEXT:    sw a2, 8(sp)
+; CHECK-ZVKB32-NEXT:    sw a1, 12(sp)
+; CHECK-ZVKB32-NEXT:    addi a1, sp, 8
+; CHECK-ZVKB32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVKB32-NEXT:    vlse64.v v9, (a1), zero
+; CHECK-ZVKB32-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-ZVKB32-NEXT:    addi sp, sp, 16
+; CHECK-ZVKB32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-ZVKB32-NEXT:    ret
+;
+; CHECK-ZVKB64-LABEL: vandn_vx_vp_imm64:
+; CHECK-ZVKB64:       # %bb.0:
+; CHECK-ZVKB64-NEXT:    lui a1, 1048560
+; CHECK-ZVKB64-NEXT:    srli a1, a1, 8
+; CHECK-ZVKB64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVKB64-NEXT:    vandn.vx v8, v8, a1, v0.t
+; CHECK-ZVKB64-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> splat (i64 -72057594037927681), <vscale x 1 x i1> %mask, i32 %evl)
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 1 x i16> @vand_vadd_vx_vp_imm16(<vscale x 1 x i16> %x, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vand_vadd_vx_vp_imm16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vadd.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVKB-LABEL: vand_vadd_vx_vp_imm16:
+; CHECK-ZVKB:       # %bb.0:
+; CHECK-ZVKB-NEXT:    lui a1, 8
+; CHECK-ZVKB-NEXT:    addi a1, a1, -1
+; CHECK-ZVKB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVKB-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-ZVKB-NEXT:    vadd.vx v8, v8, a1, v0.t
+; CHECK-ZVKB-NEXT:    ret
+  %a = call <vscale x 1 x i16> @llvm.vp.and.nxv1i16(<vscale x 1 x i16> splat (i16 32767), <vscale x 1 x i16> %x, <vscale x 1 x i1> %mask, i32 %evl)
+  %b = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> splat (i16 32767), <vscale x 1 x i16> %a, <vscale x 1 x i1> %mask, i32 %evl)
+  ret <vscale x 1 x i16> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
index 809ae2d2bebfe..16e5e7b9199a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
@@ -29,6 +29,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
+; CHECK-LABEL: test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg2e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -191,6 +203,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg3e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -326,6 +350,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg4e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -461,6 +497,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg5e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -569,6 +617,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg6e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -677,6 +737,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg7e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
@@ -785,6 +857,18 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlseg8e8.v v7, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
index 3b9db2655e033..0f7348b474ee4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
@@ -31,6 +31,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg2ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -640,6 +653,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg3ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -1191,6 +1217,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg4ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv1i8_4t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -1742,6 +1781,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg5ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv1i8_5t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -2206,6 +2258,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg6ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv1i8_6t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -2670,6 +2735,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg7ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv1i8_7t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
@@ -3134,6 +3212,19 @@ entry:
   ret <vscale x 1 x i8> %1
 }
 
+define <vscale x 1 x i8> @test_vluxseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vluxseg8ei8.v v9, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+  %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
+  ret <vscale x 1 x i8> %1
+}
+
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv1i8_8t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i16>, i64, i64)
 declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll
index f2ae2136078c0..607ce2394ee81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll
@@ -46,6 +46,22 @@ entry:
   ret void
 }
 
+define void @intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  call void @llvm.riscv.vse.mask.nxv1i64(
+    <vscale x 1 x i64> %0,
+    ptr %1,
+    <vscale x 1 x i1> splat (i1 true),
+    iXLen %3)
+
+  ret void
+}
+
 declare void @llvm.riscv.vse.nxv2i64(
   <vscale x 2 x i64>,
   ptr,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/rvv/vsse.ll
index 6908a2000653b..770e06749c348 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsse.ll
@@ -50,6 +50,23 @@ entry:
   ret void
 }
 
+define void @intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT:    vsse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  call void @llvm.riscv.vsse.mask.nxv1i64(
+    <vscale x 1 x i64> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> splat (i1 true),
+    iXLen %4)
+
+  ret void
+}
+
 declare void @llvm.riscv.vsse.nxv2i64(
   <vscale x 2 x i64>,
   ptr,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
index 6ce326be23ee3..880066bf45990 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
@@ -27,6 +27,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg2e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64)
 declare void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64)
 
@@ -277,6 +288,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg3_allonesmask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %val, ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+; CHECK-LABEL: test_vsseg3_allonesmask_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vsseg3e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %val, ptr %base, <vscale x 16 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i64, i64)
 declare void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64)
 
@@ -302,6 +324,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg4e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64)
 declare void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64)
 
@@ -427,6 +460,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg5e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64)
 declare void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64)
 
@@ -527,6 +571,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg6e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64)
 declare void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64)
 
@@ -627,6 +682,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg7e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64)
 declare void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64)
 
@@ -727,6 +793,17 @@ entry:
   ret void
 }
 
+define void @test_vsseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %val, ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsseg8e8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %val, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64)
 declare void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
index 3572f5909400a..6ba2b405c943e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
@@ -50,6 +50,23 @@ entry:
   ret void
 }
 
+define void @intrinsic_vsuxei_allonesmask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+; CHECK-LABEL: intrinsic_vsuxei_allonesmask_v_nxv1i8_nxv1i8_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxei64.v v8, (a0), v9
+; CHECK-NEXT:    ret
+entry:
+  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i64(
+    <vscale x 1 x i8> %0,
+    ptr %1,
+    <vscale x 1 x i64> %2,
+    <vscale x 1 x i1> splat (i1 true),
+    i64 %4)
+
+  ret void
+}
+
 declare void @llvm.riscv.vsuxei.nxv2i8.nxv2i64(
   <vscale x 2 x i8>,
   ptr,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
index 22be2ebca8fde..b297d33611242 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
@@ -27,6 +27,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg2ei8.v v8, (a0), v10
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -552,6 +563,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg3ei8.v v8, (a0), v11
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -1027,6 +1049,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg4ei8.v v8, (a0), v12
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv1i8_4t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -1502,6 +1535,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg5ei8.v v8, (a0), v13
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv1i8_5t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -1902,6 +1946,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg6ei8.v v8, (a0), v14
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv1i8_6t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -2302,6 +2357,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg7ei8.v v8, (a0), v15
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv1i8_7t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
@@ -2702,6 +2768,17 @@ entry:
   ret void
 }
 
+define void @test_vsuxseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t_nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %val, ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vsuxseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsuxseg8ei8.v v8, (a0), v16
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i8.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %val, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 3)
+  ret void
+}
+
 declare void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv1i8_8t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i16>, i64, i64)
 declare void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64)
 
diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll
index b7e6e95f366cf..77faa56230876 100644
--- a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll
+++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll
@@ -28,19 +28,19 @@
 ; CHECK-MIR-DAG:   [[float:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_32]], [[enc_float]], [[i32_0]]
 ; CHECK-MIR-DAG:   [[double:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_64]], [[enc_float]], [[i32_0]]
 ; CHECK-MIR-DAG:   [[int:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_32]], [[enc_signed]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[bool]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[short]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[char]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[long]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[unsigned_int]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[unsigned_short]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[unsigned_char]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[unsigned_long]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[float]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[double]], [[i32_8]], [[i32_0]]
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[int]], [[i32_5]], [[i32_0]]
-; CHECK-MIR:   [[debug_info_none:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 0
-; CHECK-MIR:   OpExtInst [[void_type]], 3, 3, [[debug_info_none]], [[i32_5]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[bool]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[short]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[char]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[long]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[unsigned_int]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[unsigned_short]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[unsigned_char]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[unsigned_long]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[float]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[double]], [[i32_8]], [[i32_0]]
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[int]], [[i32_5]], [[i32_0]]
+; CHECK-MIR-DAG:   [[debug_info_none:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 0
+; CHECK-MIR-DAG:   OpExtInst [[void_type]], 3, 3, [[debug_info_none]], [[i32_5]], [[i32_0]]
 
 ; CHECK-SPIRV:	[[i32type:%[0-9]+]] = OpTypeInt 32 0
 ; CHECK-SPIRV-DAG:	[[i32_8:%[0-9]+]] = OpConstant [[i32type]] 8
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index 5ec9f6a2a321b..7228d5335a33f 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -481,21 +481,6 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v16i8:
 ; NO-SIMD128:         .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    i32.store8 15($0), $2
-; NO-SIMD128-NEXT:    i32.store8 14($0), $2
-; NO-SIMD128-NEXT:    i32.store8 13($0), $2
-; NO-SIMD128-NEXT:    i32.store8 12($0), $2
-; NO-SIMD128-NEXT:    i32.store8 11($0), $2
-; NO-SIMD128-NEXT:    i32.store8 10($0), $2
-; NO-SIMD128-NEXT:    i32.store8 9($0), $2
-; NO-SIMD128-NEXT:    i32.store8 8($0), $2
-; NO-SIMD128-NEXT:    i32.store8 7($0), $2
-; NO-SIMD128-NEXT:    i32.store8 6($0), $2
-; NO-SIMD128-NEXT:    i32.store8 5($0), $2
-; NO-SIMD128-NEXT:    i32.store8 4($0), $2
-; NO-SIMD128-NEXT:    i32.store8 3($0), $2
-; NO-SIMD128-NEXT:    i32.store8 2($0), $2
-; NO-SIMD128-NEXT:    i32.store8 1($0), $2
 ; NO-SIMD128-NEXT:    i32.store8 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <16 x i8> %x, <16 x i8> %y,
@@ -994,13 +979,6 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v8i16:
 ; NO-SIMD128:         .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    i32.store16 14($0), $2
-; NO-SIMD128-NEXT:    i32.store16 12($0), $2
-; NO-SIMD128-NEXT:    i32.store16 10($0), $2
-; NO-SIMD128-NEXT:    i32.store16 8($0), $2
-; NO-SIMD128-NEXT:    i32.store16 6($0), $2
-; NO-SIMD128-NEXT:    i32.store16 4($0), $2
-; NO-SIMD128-NEXT:    i32.store16 2($0), $2
 ; NO-SIMD128-NEXT:    i32.store16 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <8 x i16> %x, <8 x i16> %y,
@@ -1288,9 +1266,6 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v4i32:
 ; NO-SIMD128:         .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    i32.store 12($0), $2
-; NO-SIMD128-NEXT:    i32.store 8($0), $2
-; NO-SIMD128-NEXT:    i32.store 4($0), $2
 ; NO-SIMD128-NEXT:    i32.store 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <4 x i32> %x, <4 x i32> %y,
@@ -1550,7 +1525,6 @@ define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v2i64:
 ; NO-SIMD128:         .functype shuffle_undef_v2i64 (i32, i64, i64, i64, i64) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    i64.store 8($0), $2
 ; NO-SIMD128-NEXT:    i64.store 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <2 x i64> %x, <2 x i64> %y,
@@ -1819,9 +1793,6 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v4f32:
 ; NO-SIMD128:         .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    f32.store 12($0), $2
-; NO-SIMD128-NEXT:    f32.store 8($0), $2
-; NO-SIMD128-NEXT:    f32.store 4($0), $2
 ; NO-SIMD128-NEXT:    f32.store 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <4 x float> %x, <4 x float> %y,
@@ -2082,7 +2053,6 @@ define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
 ; NO-SIMD128-LABEL: shuffle_undef_v2f64:
 ; NO-SIMD128:         .functype shuffle_undef_v2f64 (i32, f64, f64, f64, f64) -> ()
 ; NO-SIMD128-NEXT:  # %bb.0:
-; NO-SIMD128-NEXT:    f64.store 8($0), $2
 ; NO-SIMD128-NEXT:    f64.store 0($0), $2
 ; NO-SIMD128-NEXT:    return
   %res = shufflevector <2 x double> %x, <2 x double> %y,
diff --git a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
index 260451f0f6822..b7713128f4721 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
@@ -2,57 +2,57 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X64
 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X86
 
-define <32 x bfloat> @test_int_x86_avx10_vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B) nounwind {
-; X64-LABEL: test_int_x86_avx10_vminmaxnepbf16512:
+define <32 x bfloat> @test_int_x86_avx10_vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B) nounwind {
+; X64-LABEL: test_int_x86_avx10_vminmaxbf16512:
 ; X64:       # %bb.0:
-; X64-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_vminmaxnepbf16512:
+; X86-LABEL: test_int_x86_avx10_vminmaxbf16512:
 ; X86:       # %bb.0:
-; X86-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
+  %ret = call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
   ret <32 x bfloat> %ret
 }
 
-define <32 x bfloat> @test_int_x86_avx10_mask_vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, <32 x bfloat> %C, i32 %D) nounwind {
-; X64-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16512:
+define <32 x bfloat> @test_int_x86_avx10_mask_vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, <32 x bfloat> %C, i32 %D) nounwind {
+; X64-LABEL: test_int_x86_avx10_mask_vminmaxbf16512:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x52,0xd1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x52,0xd1,0x7f]
 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16512:
+; X86-LABEL: test_int_x86_avx10_mask_vminmaxbf16512:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x52,0xd1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x52,0xd1,0x7f]
 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
+  %0 = call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
   %1 = bitcast i32 %D to <32 x i1>
   %2 = select reassoc nsz arcp contract afn <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %C
   ret <32 x bfloat> %2
 }
 
-declare <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 %C)
+declare <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 %C)
 
-define <32 x bfloat> @test_int_x86_avx10_maskz_vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 %C) nounwind {
-; X64-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16512:
+define <32 x bfloat> @test_int_x86_avx10_maskz_vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 %C) nounwind {
+; X64-LABEL: test_int_x86_avx10_maskz_vminmaxbf16512:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xc9,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xc9,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16512:
+; X86-LABEL: test_int_x86_avx10_maskz_vminmaxbf16512:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xc9,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xc9,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <32 x bfloat> @llvm.x86.avx10.vminmaxnepbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
+  %0 = call <32 x bfloat> @llvm.x86.avx10.vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B, i32 127)
   %1 = bitcast i32 %C to <32 x i1>
   %2 = select reassoc nsz arcp contract afn <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
   ret <32 x bfloat> %2
diff --git a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
index fd6a01a4a3b69..5dc6ec12b3f60 100644
--- a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
@@ -2,114 +2,114 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X64
 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X86
 
-define <8 x bfloat> @test_int_x86_avx10_vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B) nounwind {
-; X64-LABEL: test_int_x86_avx10_vminmaxnepbf16128:
+define <8 x bfloat> @test_int_x86_avx10_vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B) nounwind {
+; X64-LABEL: test_int_x86_avx10_vminmaxbf16128:
 ; X64:       # %bb.0:
-; X64-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_vminmaxnepbf16128:
+; X86-LABEL: test_int_x86_avx10_vminmaxbf16128:
 ; X86:       # %bb.0:
-; X86-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
+  %ret = call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
   ret <8 x bfloat> %ret
 }
 
-define <8 x bfloat> @test_int_x86_avx10_mask_vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, <8 x bfloat> %C, i8 %D) nounwind {
-; X64-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16128:
+define <8 x bfloat> @test_int_x86_avx10_mask_vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, <8 x bfloat> %C, i8 %D) nounwind {
+; X64-LABEL: test_int_x86_avx10_mask_vminmaxbf16128:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x52,0xd1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x52,0xd1,0x7f]
 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16128:
+; X86-LABEL: test_int_x86_avx10_mask_vminmaxbf16128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x52,0xd1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x52,0xd1,0x7f]
 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
+  %0 = call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
   %1 = bitcast i8 %D to <8 x i1>
   %2 = select reassoc nsz arcp contract afn <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %C
   ret <8 x bfloat> %2
 }
 
-declare <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 %C)
+declare <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 %C)
 
-define <8 x bfloat> @test_int_x86_avx10_maskz_vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i8 %C) nounwind {
-; X64-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16128:
+define <8 x bfloat> @test_int_x86_avx10_maskz_vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i8 %C) nounwind {
+; X64-LABEL: test_int_x86_avx10_maskz_vminmaxbf16128:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0x89,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0x89,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16128:
+; X86-LABEL: test_int_x86_avx10_maskz_vminmaxbf16128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0x89,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0x89,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <8 x bfloat> @llvm.x86.avx10.vminmaxnepbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
+  %0 = call <8 x bfloat> @llvm.x86.avx10.vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B, i32 127)
   %1 = bitcast i8 %C to <8 x i1>
   %2 = select reassoc nsz arcp contract afn <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
   ret <8 x bfloat> %2
 }
 
-define <16 x bfloat> @test_int_x86_avx10_vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B) nounwind {
-; X64-LABEL: test_int_x86_avx10_vminmaxnepbf16256:
+define <16 x bfloat> @test_int_x86_avx10_vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B) nounwind {
+; X64-LABEL: test_int_x86_avx10_vminmaxbf16256:
 ; X64:       # %bb.0:
-; X64-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_vminmaxnepbf16256:
+; X86-LABEL: test_int_x86_avx10_vminmaxbf16256:
 ; X86:       # %bb.0:
-; X86-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %ret = call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
+  %ret = call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
   ret <16 x bfloat> %ret
 }
 
-define <16 x bfloat> @test_int_x86_avx10_mask_vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, <16 x bfloat> %C, i16 %D) nounwind {
-; X64-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16256:
+define <16 x bfloat> @test_int_x86_avx10_mask_vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, <16 x bfloat> %C, i16 %D) nounwind {
+; X64-LABEL: test_int_x86_avx10_mask_vminmaxbf16256:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x52,0xd1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x52,0xd1,0x7f]
 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_mask_vminmaxnepbf16256:
+; X86-LABEL: test_int_x86_avx10_mask_vminmaxbf16256:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x52,0xd1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x52,0xd1,0x7f]
 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
+  %0 = call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
   %1 = bitcast i16 %D to <16 x i1>
   %2 = select reassoc nsz arcp contract afn <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %C
   ret <16 x bfloat> %2
 }
 
-declare <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 %C)
+declare <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 %C)
 
-define <16 x bfloat> @test_int_x86_avx10_maskz_vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i16 %C) nounwind {
-; X64-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16256:
+define <16 x bfloat> @test_int_x86_avx10_maskz_vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i16 %C) nounwind {
+; X64-LABEL: test_int_x86_avx10_maskz_vminmaxbf16256:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xa9,0x52,0xc1,0x7f]
+; X64-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xa9,0x52,0xc1,0x7f]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
-; X86-LABEL: test_int_x86_avx10_maskz_vminmaxnepbf16256:
+; X86-LABEL: test_int_x86_avx10_maskz_vminmaxbf16256:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vminmaxnepbf16 $127, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xa9,0x52,0xc1,0x7f]
+; X86-NEXT:    vminmaxbf16 $127, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7f,0xa9,0x52,0xc1,0x7f]
 ; X86-NEXT:    retl # encoding: [0xc3]
 entry:
-  %0 = call <16 x bfloat> @llvm.x86.avx10.vminmaxnepbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
+  %0 = call <16 x bfloat> @llvm.x86.avx10.vminmaxbf16256(<16 x bfloat> %A, <16 x bfloat> %B, i32 127)
   %1 = bitcast i16 %C to <16 x i1>
   %2 = select reassoc nsz arcp contract afn <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
   ret <16 x bfloat> %2
diff --git a/llvm/test/CodeGen/X86/clang-section-coff.ll b/llvm/test/CodeGen/X86/clang-section-coff.ll
new file mode 100644
index 0000000000000..02381fd256aaa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clang-section-coff.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;RUN: llc -mtriple=x86_64-windows-msvc %s -o - | FileCheck %s
+;Test that global variables and functions are assigned to correct sections.
+
+@a = global i32 0, align 4 #0
+@b = global i32 1, align 4 #0
+@c = global [4 x i32] zeroinitializer, align 4 #0
+@d = global [5 x i16] zeroinitializer, align 2 #0
+@e = global [6 x i16] [i16 0, i16 0, i16 1, i16 0, i16 0, i16 0], align 2 #0
+@f = constant i32 2, align 4 #0
+@h = global i32 0, align 4 #1
+@i = global i32 0, align 4 #2
+@j = constant i32 4, align 4 #2
+@k = global i32 0, align 4 #2
+@_ZZ3gooE7lstat_h = internal global i32 0, align 4 #2
+@_ZL1g = internal global [2 x i32] zeroinitializer, align 4 #0
+@l = global i32 5, align 4 #3
+@m = constant i32 6, align 4 #3
+@n = global i32 0, align 4
+@o = global i32 6, align 4
+@p = constant i32 7, align 4
+
+declare i32 @zoo(ptr, ptr) #6
+
+; Function Attrs: noinline nounwind
+define i32 @hoo() #7 {
+; CHECK-LABEL: hoo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl b(%rip), %eax
+; CHECK-NEXT:    retq
+entry:
+  %0 = load i32, ptr @b, align 4
+  ret i32 %0
+}
+
+attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" }
+attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" }
+attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"static_rwdata", i32 1}
+!2 = !{i32 1, !"enumsize_buildattr", i32 2}
+!3 = !{i32 1, !"armlib_unavailable", i32 0}
+
+;CHECK: 	.section	my_bss.1,"bw"
+;CHECK: 	.globl	a                               # @a
+;CHECK: 	.p2align	2, 0x0
+;CHECK: a:
+;CHECK: 	.long	0                               # 0x0
+
+;CHECK: 	.section	my_data.1,"dw"
+;CHECK: 	.globl	b                               # @b
+;CHECK: 	.p2align	2, 0x0
+;CHECK: b:
+;CHECK: 	.long	1                               # 0x1
+
+;CHECK: 	.section	my_bss.1,"bw"
+;CHECK: 	.globl	c                               # @c
+;CHECK: 	.p2align	2, 0x0
+;CHECK: c:
+;CHECK: 	.zero	16
+;CHECK: 	.globl	d                               # @d
+;CHECK: 	.p2align	1, 0x0
+;CHECK: d:
+;CHECK: 	.zero	10
+
+;CHECK: 	.section	my_data.1,"dw"
+;CHECK: 	.globl	e                               # @e
+;CHECK: 	.p2align	1, 0x0
+;CHECK: e:
+;CHECK: 	.short	0                               # 0x0
+;CHECK: 	.short	0                               # 0x0
+;CHECK: 	.short	1                               # 0x1
+;CHECK: 	.short	0                               # 0x0
+;CHECK: 	.short	0                               # 0x0
+;CHECK: 	.short	0                               # 0x0
+
+;CHECK: 	.section	my_rodata.1,"dr"
+;CHECK: 	.globl	f                               # @f
+;CHECK: 	.p2align	2, 0x0
+;CHECK: f:
+;CHECK: 	.long	2                               # 0x2
+;CHECK: 	.bss
+;CHECK: 	.globl	h                               # @h
+;CHECK: 	.p2align	2, 0x0
+;CHECK: h:
+;CHECK: 	.long	0                               # 0x0
+
+;CHECK: 	.section	my_bss.2,"bw"
+;CHECK: 	.globl	i                               # @i
+;CHECK: 	.p2align	2, 0x0
+;CHECK: i:
+;CHECK: 	.long	0                               # 0x0
+
+;CHECK: 	.section	my_rodata.1,"dr"
+;CHECK: 	.globl	j                               # @j
+;CHECK: 	.p2align	2, 0x0
+;CHECK: j:
+;CHECK: 	.long	4                               # 0x4
+
+;CHECK: 	.section	my_bss.2,"bw"
+;CHECK: 	.globl	k                               # @k
+;CHECK: 	.p2align	2, 0x0
+;CHECK: k:
+;CHECK: 	.long	0                               # 0x0
+;CHECK: 	.p2align	2, 0x0                          # @_ZZ3gooE7lstat_h
+;CHECK: _ZZ3gooE7lstat_h:
+;CHECK: 	.long	0                               # 0x0
+
+;CHECK: 	.section	my_bss.1,"bw"
+;CHECK: 	.p2align	2, 0x0                          # @_ZL1g
+;CHECK: _ZL1g:
+;CHECK: 	.zero	8
+
+;CHECK: 	.section	my_data.2,"dw"
+;CHECK: 	.globl	l                               # @l
+;CHECK: 	.p2align	2, 0x0
+;CHECK: l:
+;CHECK: 	.long	5                               # 0x5
+
+;CHECK: 	.section	my_rodata.2,"dr"
+;CHECK: 	.globl	m                               # @m
+;CHECK: 	.p2align	2, 0x0
+;CHECK: m:
+;CHECK: 	.long	6                               # 0x6
+;CHECK: 	.bss
+;CHECK: 	.globl	n                               # @n
+;CHECK: 	.p2align	2, 0x0
+;CHECK: n:
+;CHECK: 	.long	0                               # 0x0
+;CHECK: 	.data
+;CHECK: 	.globl	o                               # @o
+;CHECK: 	.p2align	2, 0x0
+;CHECK: o:
+;CHECK: 	.long	6                               # 0x6
+
+;CHECK: 	.section	.rdata,"dr"
+;CHECK: 	.globl	p                               # @p
+;CHECK: 	.p2align	2, 0x0
+;CHECK: p:
+;CHECK: 	.long	7                               # 0x7
diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.ll b/llvm/test/CodeGen/X86/fixup-bw-inst.ll
index 6c371e22b4e6e..4301498912003 100644
--- a/llvm/test/CodeGen/X86/fixup-bw-inst.ll
+++ b/llvm/test/CodeGen/X86/fixup-bw-inst.ll
@@ -1,7 +1,6 @@
-; RUN: llc -fixup-byte-word-insts=1 < %s | \
-; RUN: FileCheck -check-prefix CHECK -check-prefix BWON %s
-; RUN: llc -fixup-byte-word-insts=0 < %s | \
-; RUN: FileCheck -check-prefix CHECK -check-prefix BWOFF %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -fixup-byte-word-insts=1 < %s | FileCheck %s -check-prefix=BWON
+; RUN: llc -fixup-byte-word-insts=0 < %s | FileCheck %s -check-prefix=BWOFF
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -11,22 +10,40 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; This has byte loads interspersed with byte stores, in a single
 ; basic-block loop.  The upper portion should be dead, so the movb loads
 ; should have been changed into movzbl instead.
-; CHECK-LABEL: foo1
-; load:
-; BWON:  movzbl
-; BWOFF: movb
-; store:
-; CHECK: movb
-; load:
-; BWON: movzbl
-; BWOFF: movb
-; store:
-; CHECK: movb
-; CHECK: ret
-define void @foo1(i32 %count,
-                  ptr noalias nocapture %q,
-                  ptr noalias nocapture %p)
-                    nounwind uwtable noinline ssp {
+define void @foo1(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
+; BWON-LABEL: foo1:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    testl %edi, %edi
+; BWON-NEXT:    jle LBB0_2
+; BWON-NEXT:    .p2align 4
+; BWON-NEXT:  LBB0_1: ## %a4
+; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWON-NEXT:    movzbl (%rsi), %eax
+; BWON-NEXT:    movb %al, (%rdx)
+; BWON-NEXT:    movzbl 1(%rsi), %eax
+; BWON-NEXT:    movb %al, 1(%rdx)
+; BWON-NEXT:    addq $8, %rdx
+; BWON-NEXT:    decl %edi
+; BWON-NEXT:    jne LBB0_1
+; BWON-NEXT:  LBB0_2: ## %._crit_edge
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo1:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    testl %edi, %edi
+; BWOFF-NEXT:    jle LBB0_2
+; BWOFF-NEXT:    .p2align 4
+; BWOFF-NEXT:  LBB0_1: ## %a4
+; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWOFF-NEXT:    movb (%rsi), %al
+; BWOFF-NEXT:    movb %al, (%rdx)
+; BWOFF-NEXT:    movb 1(%rsi), %al
+; BWOFF-NEXT:    movb %al, 1(%rdx)
+; BWOFF-NEXT:    addq $8, %rdx
+; BWOFF-NEXT:    decl %edi
+; BWOFF-NEXT:    jne LBB0_1
+; BWOFF-NEXT:  LBB0_2: ## %._crit_edge
+; BWOFF-NEXT:    retq
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -56,22 +73,40 @@ a4:                                       ; preds = %3, %.lr.ph
 ; This has word loads interspersed with word stores.
 ; The upper portion should be dead, so the movw loads should have
 ; been changed into movzwl instead.
-; CHECK-LABEL: foo2
-; load:
-; BWON:  movzwl
-; BWOFF: movw
-; store:
-; CHECK: movw
-; load:
-; BWON:  movzwl
-; BWOFF: movw
-; store:
-; CHECK: movw
-; CHECK: ret
-define void @foo2(i32 %count,
-                  ptr noalias nocapture %q,
-                  ptr noalias nocapture %p)
-                    nounwind uwtable noinline ssp {
+define void @foo2(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
+; BWON-LABEL: foo2:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    testl %edi, %edi
+; BWON-NEXT:    jle LBB1_2
+; BWON-NEXT:    .p2align 4
+; BWON-NEXT:  LBB1_1: ## %a4
+; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWON-NEXT:    movzwl (%rsi), %eax
+; BWON-NEXT:    movw %ax, (%rdx)
+; BWON-NEXT:    movzwl 2(%rsi), %eax
+; BWON-NEXT:    movw %ax, 2(%rdx)
+; BWON-NEXT:    addq $16, %rdx
+; BWON-NEXT:    decl %edi
+; BWON-NEXT:    jne LBB1_1
+; BWON-NEXT:  LBB1_2: ## %._crit_edge
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo2:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    testl %edi, %edi
+; BWOFF-NEXT:    jle LBB1_2
+; BWOFF-NEXT:    .p2align 4
+; BWOFF-NEXT:  LBB1_1: ## %a4
+; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWOFF-NEXT:    movw (%rsi), %ax
+; BWOFF-NEXT:    movw %ax, (%rdx)
+; BWOFF-NEXT:    movw 2(%rsi), %ax
+; BWOFF-NEXT:    movw %ax, 2(%rdx)
+; BWOFF-NEXT:    addq $16, %rdx
+; BWOFF-NEXT:    decl %edi
+; BWOFF-NEXT:    jne LBB1_1
+; BWOFF-NEXT:  LBB1_2: ## %._crit_edge
+; BWOFF-NEXT:    retq
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -98,11 +133,18 @@ a4:                                       ; preds = %3, %.lr.ph
 
 ; This test contains nothing but a simple byte load and store.
 ; movb encodes smaller, but we use movzbl for the load for better perf.
-; CHECK-LABEL: foo3:
-; BWON:  movzbl
-; BWOFF: movb
-; CHECK: movb
 define void @foo3(ptr%dst, ptr%src) {
+; BWON-LABEL: foo3:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    movzbl (%rsi), %eax
+; BWON-NEXT:    movb %al, (%rdi)
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo3:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    movb (%rsi), %al
+; BWOFF-NEXT:    movb %al, (%rdi)
+; BWOFF-NEXT:    retq
   %t0 = load i8, ptr%src, align 1
   store i8 %t0, ptr%dst, align 1
   ret void
@@ -111,11 +153,18 @@ define void @foo3(ptr%dst, ptr%src) {
 ; This test contains nothing but a simple word load and store.  Since
 ; movw and movzwl are the same size, we should always choose to use
 ; movzwl instead.
-; CHECK-LABEL: foo4:
-; BWON:  movzwl
-; BWOFF: movw
-; CHECK: movw
 define void @foo4(ptr%dst, ptr%src) {
+; BWON-LABEL: foo4:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    movzwl (%rsi), %eax
+; BWON-NEXT:    movw %ax, (%rdi)
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo4:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    movw (%rsi), %ax
+; BWOFF-NEXT:    movw %ax, (%rdi)
+; BWOFF-NEXT:    retq
   %t0 = load i16, ptr%src, align 2
   store i16 %t0, ptr%dst, align 2
   ret void
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 1dcce5336895f..257524e0d4db5 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -2519,7 +2519,7 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ;
 ; AVX10_2-LABEL: test_fmaximum_v4bf16:
 ; AVX10_2:       # %bb.0:
-; AVX10_2-NEXT:    vminmaxnepbf16 $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT:    vminmaxbf16 $1, %xmm1, %xmm0, %xmm0
 ; AVX10_2-NEXT:    retq
 ;
 ; X86-LABEL: test_fmaximum_v4bf16:
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index 2e9e8e62b3569..bfff6ef41dbe0 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -2647,7 +2647,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ;
 ; AVX10_2-LABEL: test_fmaximumnum_v4bf16:
 ; AVX10_2:       # %bb.0:
-; AVX10_2-NEXT:    vminmaxnepbf16 $17, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT:    vminmaxbf16 $17, %xmm1, %xmm0, %xmm0
 ; AVX10_2-NEXT:    retq
 ;
 ; X86-LABEL: test_fmaximumnum_v4bf16:
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 1bca1b960edda..203be56751d09 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -58,6 +58,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll
new file mode 100644
index 0000000000000..59cbcd0689fbf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
+; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
+
+define i32 @foo() local_unnamed_addr #0 {
+; CHECK-X64-LABEL: foo:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    movabsq $-4799995904, %r11 # imm = 0xFFFFFFFEE1E5E000
+; CHECK-X64-NEXT:    addq %rsp, %r11
+; CHECK-X64-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-X64-NEXT:    .cfi_adjust_cfa_offset 4799995904
+; CHECK-X64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X64-NEXT:    subq $4096, %rsp # imm = 0x1000
+; CHECK-X64-NEXT:    movq $0, (%rsp)
+; CHECK-X64-NEXT:    cmpq %r11, %rsp
+; CHECK-X64-NEXT:    jne .LBB0_1
+; CHECK-X64-NEXT:  # %bb.2:
+; CHECK-X64-NEXT:    subq $3976, %rsp # imm = 0xF88
+; CHECK-X64-NEXT:    .cfi_def_cfa_register %rsp
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 4799999888
+; CHECK-X64-NEXT:    movl $1, 264(%rsp)
+; CHECK-X64-NEXT:    movl $1, 28664(%rsp)
+; CHECK-X64-NEXT:    movl -128(%rsp), %eax
+; CHECK-X64-NEXT:    movabsq $4799999880, %rcx # imm = 0x11E1A2F88
+; CHECK-X64-NEXT:    addq %rcx, %rsp
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: foo:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    ud2
+; CHECK-X86-NEXT:    .cfi_def_cfa_register %eax
+; CHECK-X86-NEXT:    .cfi_adjust_cfa_offset 4800000000
+; CHECK-X86-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-NEXT:    subl $4096, %esp # imm = 0x1000
+; CHECK-X86-NEXT:    movl $0, (%esp)
+; CHECK-X86-NEXT:    cmpl %eax, %esp
+; CHECK-X86-NEXT:    jne .LBB0_1
+; CHECK-X86-NEXT:  # %bb.2:
+; CHECK-X86-NEXT:    subl $12, %esp
+; CHECK-X86-NEXT:    .cfi_def_cfa_register %esp
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4800000016
+; CHECK-X86-NEXT:    movl $1, 392(%esp)
+; CHECK-X86-NEXT:    movl $1, 28792(%esp)
+; CHECK-X86-NEXT:    movl (%esp), %eax
+; CHECK-X86-NEXT:    movl $4800000012, %ecx # imm = 0x11E1A300C
+; CHECK-X86-NEXT:    addl %ecx, %esp
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-X32-LABEL: foo:
+; CHECK-X32:       # %bb.0:
+; CHECK-X32-NEXT:    ud2
+; CHECK-X32-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-X32-NEXT:    .cfi_adjust_cfa_offset 4799995904
+; CHECK-X32-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X32-NEXT:    subl $4096, %esp # imm = 0x1000
+; CHECK-X32-NEXT:    movq $0, (%esp)
+; CHECK-X32-NEXT:    cmpl %r11d, %esp
+; CHECK-X32-NEXT:    jne .LBB0_1
+; CHECK-X32-NEXT:  # %bb.2:
+; CHECK-X32-NEXT:    subl $3976, %esp # imm = 0xF88
+; CHECK-X32-NEXT:    .cfi_def_cfa_register %rsp
+; CHECK-X32-NEXT:    .cfi_def_cfa_offset 4799999888
+; CHECK-X32-NEXT:    movl $1, 264(%esp)
+; CHECK-X32-NEXT:    movl $1, 28664(%esp)
+; CHECK-X32-NEXT:    movl -128(%esp), %eax
+; CHECK-X32-NEXT:    movabsq $4799999880, %rcx # imm = 0x11E1A2F88
+; CHECK-X32-NEXT:    addq %rcx, %esp
+; CHECK-X32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X32-NEXT:    retq
+  %a = alloca i32, i64 1200000000, align 16
+  %b0 = getelementptr inbounds i32, ptr %a, i64 98
+  %b1 = getelementptr inbounds i32, ptr %a, i64 7198
+  store volatile i32 1, ptr %b0
+  store volatile i32 1, ptr %b1
+  %c = load volatile i32, ptr %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
diff --git a/llvm/test/CodeGen/X86/stack-clash-huge.ll b/llvm/test/CodeGen/X86/stack-clash-huge.ll
new file mode 100644
index 0000000000000..03f028dfc2506
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-clash-huge.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
+; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
+
+define i32 @foo() local_unnamed_addr #0 {
+; CHECK-X64-LABEL: foo:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    movabsq $-2399997952, %r11 # imm = 0xFFFFFFFF70F2F000
+; CHECK-X64-NEXT:    addq %rsp, %r11
+; CHECK-X64-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-X64-NEXT:    .cfi_adjust_cfa_offset 2399997952
+; CHECK-X64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X64-NEXT:    subq $4096, %rsp # imm = 0x1000
+; CHECK-X64-NEXT:    movq $0, (%rsp)
+; CHECK-X64-NEXT:    cmpq %r11, %rsp
+; CHECK-X64-NEXT:    jne .LBB0_1
+; CHECK-X64-NEXT:  # %bb.2:
+; CHECK-X64-NEXT:    subq $1928, %rsp # imm = 0x788
+; CHECK-X64-NEXT:    .cfi_def_cfa_register %rsp
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 2399999888
+; CHECK-X64-NEXT:    movl $1, 264(%rsp)
+; CHECK-X64-NEXT:    movl $1, 28664(%rsp)
+; CHECK-X64-NEXT:    movl -128(%rsp), %eax
+; CHECK-X64-NEXT:    movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X64-NEXT:    addq %rcx, %rsp
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: foo:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    movl %esp, %eax
+; CHECK-X86-NEXT:    subl $2399997952, %eax # imm = 0x8F0D1000
+; CHECK-X86-NEXT:    .cfi_def_cfa_register %eax
+; CHECK-X86-NEXT:    .cfi_adjust_cfa_offset 2399997952
+; CHECK-X86-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-NEXT:    subl $4096, %esp # imm = 0x1000
+; CHECK-X86-NEXT:    movl $0, (%esp)
+; CHECK-X86-NEXT:    cmpl %eax, %esp
+; CHECK-X86-NEXT:    jne .LBB0_1
+; CHECK-X86-NEXT:  # %bb.2:
+; CHECK-X86-NEXT:    subl $2060, %esp # imm = 0x80C
+; CHECK-X86-NEXT:    .cfi_def_cfa_register %esp
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 2400000016
+; CHECK-X86-NEXT:    movl $1, 392(%esp)
+; CHECK-X86-NEXT:    movl $1, 28792(%esp)
+; CHECK-X86-NEXT:    movl (%esp), %eax
+; CHECK-X86-NEXT:    movl $2400000012, %ecx # imm = 0x8F0D180C
+; CHECK-X86-NEXT:    addl %ecx, %esp
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-X32-LABEL: foo:
+; CHECK-X32:       # %bb.0:
+; CHECK-X32-NEXT:    movl %esp, %r11d
+; CHECK-X32-NEXT:    subl $2399997952, %r11d # imm = 0x8F0D1000
+; CHECK-X32-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-X32-NEXT:    .cfi_adjust_cfa_offset 2399997952
+; CHECK-X32-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X32-NEXT:    subl $4096, %esp # imm = 0x1000
+; CHECK-X32-NEXT:    movq $0, (%esp)
+; CHECK-X32-NEXT:    cmpl %r11d, %esp
+; CHECK-X32-NEXT:    jne .LBB0_1
+; CHECK-X32-NEXT:  # %bb.2:
+; CHECK-X32-NEXT:    subl $1928, %esp # imm = 0x788
+; CHECK-X32-NEXT:    .cfi_def_cfa_register %rsp
+; CHECK-X32-NEXT:    .cfi_def_cfa_offset 2399999888
+; CHECK-X32-NEXT:    movl $1, 264(%esp)
+; CHECK-X32-NEXT:    movl $1, 28664(%esp)
+; CHECK-X32-NEXT:    movl -128(%esp), %eax
+; CHECK-X32-NEXT:    movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X32-NEXT:    addq %rcx, %esp
+; CHECK-X32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X32-NEXT:    retq
+  %a = alloca i32, i64 600000000, align 16
+  %b0 = getelementptr inbounds i32, ptr %a, i64 98
+  %b1 = getelementptr inbounds i32, ptr %a, i64 7198
+  store volatile i32 1, ptr %b0
+  store volatile i32 1, ptr %b1
+  %c = load volatile i32, ptr %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
index 28ac4496acb9b..97cc1f8a15694 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
@@ -141,8 +141,10 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; SSE2-NEXT:    pmuludq %xmm1, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    movl $1463, %eax # imm = 0x5B7
+; SSE2-NEXT:    movd %eax, %xmm3
+; SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index a17b5a1e8f3e0..36094fe56d577 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -121,7 +121,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
 ; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-AVX512VL-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = ~xmm0
 ; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 ; CHECK-AVX512VL-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index 7e081310c35be..49cb7c707a14f 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -474,8 +474,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
 ; SSE2-NEXT:    pand %xmm7, %xmm6
 ; SSE2-NEXT:    paddd %xmm8, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; SSE2-NEXT:    pmuludq %xmm2, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -548,8 +546,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
 ; SSSE3-NEXT:    pand %xmm7, %xmm6
 ; SSSE3-NEXT:    paddd %xmm8, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; SSSE3-NEXT:    pmuludq %xmm2, %xmm1
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -578,25 +574,23 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
 ; SSE41-NEXT:    pmuldq %xmm2, %xmm0
 ; SSE41-NEXT:    pinsrd $3, %r8d, %xmm2
-; SSE41-NEXT:    movl {{[0-9]+}}(%rsp), %edx
+; SSE41-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; SSE41-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE41-NEXT:    movd %r9d, %xmm4
 ; SSE41-NEXT:    movdqa %xmm4, %xmm5
 ; SSE41-NEXT:    pmuldq %xmm3, %xmm4
-; SSE41-NEXT:    pinsrd $1, %edx, %xmm3
-; SSE41-NEXT:    movl {{[0-9]+}}(%rsp), %esi
-; SSE41-NEXT:    pinsrd $1, %esi, %xmm5
+; SSE41-NEXT:    pinsrd $1, %ecx, %xmm3
+; SSE41-NEXT:    movl {{[0-9]+}}(%rsp), %edx
+; SSE41-NEXT:    pinsrd $1, %edx, %xmm5
 ; SSE41-NEXT:    pmulld %xmm3, %xmm5
 ; SSE41-NEXT:    pinsrd $3, {{[0-9]+}}(%rsp), %xmm1
-; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
-; SSE41-NEXT:    movd %edx, %xmm3
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
-; SSE41-NEXT:    movd %esi, %xmm6
-; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
+; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; SSE41-NEXT:    movd %ecx, %xmm3
+; SSE41-NEXT:    movd %edx, %xmm6
 ; SSE41-NEXT:    pmuldq %xmm3, %xmm6
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
-; SSE41-NEXT:    movq %xmm5, 16(%rcx)
+; SSE41-NEXT:    movq %xmm5, 16(%rsi)
 ; SSE41-NEXT:    psrad $31, %xmm5
 ; SSE41-NEXT:    pcmpeqd %xmm3, %xmm5
 ; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
@@ -607,7 +601,7 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3],xmm0[4,5],xmm6[6,7]
 ; SSE41-NEXT:    pmulld %xmm2, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, (%rcx)
+; SSE41-NEXT:    movdqa %xmm1, (%rsi)
 ; SSE41-NEXT:    psrad $31, %xmm1
 ; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
 ; SSE41-NEXT:    pxor %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 68c6ca93576b7..62db6d234d301 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -394,8 +394,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
+; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    pmuludq %xmm2, %xmm6
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
@@ -444,8 +444,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
+; SSSE3-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    pmuludq %xmm2, %xmm6
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
@@ -492,9 +492,7 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
 ; SSE41-NEXT:    pcmpeqd %xmm6, %xmm6
 ; SSE41-NEXT:    pxor %xmm6, %xmm3
 ; SSE41-NEXT:    movd %edi, %xmm7
-; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[0,0,0,0]
 ; SSE41-NEXT:    movd %r9d, %xmm8
-; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm8[0,0,0,0]
 ; SSE41-NEXT:    pmuludq %xmm7, %xmm8
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,3],xmm1[4,5],xmm8[6,7]
diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
index 3257936f62e3b..3d34205096afe 100644
--- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
@@ -105,14 +105,13 @@ define void @shuf5(ptr %p) nounwind {
 ; X86-LABEL: shuf5:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
+; X86-NEXT:    movsd {{.*#+}} xmm0 = [33,33,u,u,u,u,u,u,0,0,u,u,u,u,u,u]
 ; X86-NEXT:    movsd %xmm0, (%eax)
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: shuf5:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $2387225703656530209, %rax # imm = 0x2121212121212121
-; X64-NEXT:    movq %rax, (%rdi)
+; X64-NEXT:    movq $8481, (%rdi) # imm = 0x2121
 ; X64-NEXT:    retq
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> poison, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, ptr %p, align 8
diff --git a/llvm/test/MC/AArch64/spe.s b/llvm/test/MC/AArch64/spe.s
index a4b2a555621fe..570ce6704502b 100644
--- a/llvm/test/MC/AArch64/spe.s
+++ b/llvm/test/MC/AArch64/spe.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -triple aarch64 -mattr +spe-eef -show-encoding %s 2>%t | FileCheck %s
-// RUN: llvm-mc -triple aarch64 -mattr +v8.7a -show-encoding %s 2>%t | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -mattr +v8.7a %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SPE-EEF-ERR %s
 // RUN: not llvm-mc -triple aarch64 < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SPE-EEF-ERR %s
 
 msr PMSNEVFR_EL1, x0
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
index 8745e81707b0e..7ddc3588041fe 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
@@ -4,112 +4,127 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_class_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
 
 v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -218,112 +233,127 @@ v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0
 v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -890,112 +920,127 @@ v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_f_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_f_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_f_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_f_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_f_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_f_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_f_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_f_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_f_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_f_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1318,112 +1363,127 @@ v_cmp_f_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 b
 v_cmp_f_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_f_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x48,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1990,112 +2050,127 @@ v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2662,112 +2737,127 @@ v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_le_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3334,112 +3424,127 @@ v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4693,112 +4798,127 @@ v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4907,112 +5027,127 @@ v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5121,112 +5256,127 @@ v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5335,112 +5485,127 @@ v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5549,112 +5714,127 @@ v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5763,112 +5943,127 @@ v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5977,112 +6172,127 @@ v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_o_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6191,112 +6401,116 @@ v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x
 v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_t_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_t_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_t_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_t_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6619,112 +6833,123 @@ v_cmp_t_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 b
 v_cmp_t_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_t_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4f,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_t_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_t_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_tru_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_tru_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_tru_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_tru_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_tru_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_tru_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_tru_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_t_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_tru_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_t_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6833,112 +7058,127 @@ v_cmp_tru_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_tru_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmp_t_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_u_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopc.s
index 8b512123b354a..5012cfab550cf 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopc.s
@@ -4,44 +4,55 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x08,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x08,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x11,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -82,44 +93,59 @@ v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7e,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -338,44 +364,59 @@ v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_f_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_f_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_f_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_f_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_f_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x00,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_f_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x00,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x00,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x00,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_f_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_f_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -494,44 +535,59 @@ v_cmp_f_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_f_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_f_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x48,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -750,44 +806,59 @@ v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1006,44 +1077,59 @@ v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1262,44 +1348,59 @@ v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1789,44 +1890,59 @@ v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1867,44 +1983,59 @@ v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1945,44 +2076,59 @@ v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x19,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2023,44 +2169,59 @@ v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2101,44 +2262,59 @@ v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2179,44 +2355,59 @@ v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2257,44 +2448,59 @@ v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2335,44 +2541,48 @@ v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x17,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_t_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_t_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_t_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2491,44 +2701,55 @@ v_cmp_t_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_t_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_t_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4f,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_tru_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_tru_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_t_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0f,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_tru_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_t_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2569,44 +2790,59 @@ v_cmp_tru_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_tru_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmp_t_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1f,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
index 665a52fd4c278..d1e55d105c0b5 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
@@ -4,20 +4,20 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64 s5, v1, v2
-// W32: v_cmp_class_f16_e64 s5, v1, v2          ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_class_f16_e64 s5, v1.l, v2.l      ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, v255, v2
-// W32: v_cmp_class_f16_e64 s5, v255, v2        ; encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s5, v255.l, v2.l
+// W32: v_cmp_class_f16_e64 s5, v255.l, v2.l    ; encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, s1, v2
-// W32: v_cmp_class_f16_e64 s5, s1, v2          ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+v_cmp_class_f16_e64 s5, s1, v2.l
+// W32: v_cmp_class_f16_e64 s5, s1, v2.l        ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, s105, v255
-// W32: v_cmp_class_f16_e64 s5, s105, v255      ; encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+v_cmp_class_f16_e64 s5, s105, v255.l
+// W32: v_cmp_class_f16_e64 s5, s105, v255.l    ; encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s5, vcc_lo, s2
@@ -60,24 +60,24 @@ v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo
 // W32: v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo ; encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], v1, 0.5
-// W64: v_cmp_class_f16_e64 s[10:11], v1, 0.5   ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
-// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+v_cmp_class_f16_e64 s10, v1.l, 0.5
+// W32: v_cmp_class_f16_e64 s10, v1.l, 0.5      ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], v1.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], v255, v2
-// W64: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s[10:11], v255.l, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], v255.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], s1, v2
-// W64: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+v_cmp_class_f16_e64 s[10:11], s1, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], s1, v2.l  ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], s105, v255
-// W64: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+v_cmp_class_f16_e64 s[10:11], s105, v255.l
+// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], vcc_lo, s2
@@ -123,6 +123,26 @@ v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo
 v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi
 // GFX11: v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi ; encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
+v_cmp_class_f16_e64 vcc_lo, 0.5, m0
+// W32: v_cmp_class_f16_e64 vcc_lo, 0.5, m0     ; encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, v255.h, v2.l
+// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l    ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s105, v255.h
+// W32: v_cmp_class_f16_e64 s5, s105, v255.h    ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v255.h, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s105, v255.h
+// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
 v_cmp_class_f32_e64 s5, v1, v2
 // W32: v_cmp_class_f32_e64 s5, v1, v2          ; encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
@@ -353,12 +373,12 @@ v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc
 v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456
 // GFX11: v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 ; encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_eq_f16_e64 s5, v1, v2
-// W32: v_cmp_eq_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_eq_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_eq_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s5, v255, v255
-// W32: v_cmp_eq_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_eq_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_eq_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_eq_f16_e64 s5, s1, s2
@@ -409,12 +429,12 @@ v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_eq_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_eq_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_eq_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_eq_f16_e64 s[10:11], s1, s2
@@ -468,6 +488,26 @@ v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_eq_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_eq_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_eq_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_eq_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_eq_f32_e64 s5, v1, v2
 // W32: v_cmp_eq_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -1364,12 +1404,12 @@ v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_eq_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_eq_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_f_f16_e64 s5, v1, v2
-// W32: v_cmp_f_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_f_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_f_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_f_f16_e64 s5, v255, v255
-// W32: v_cmp_f_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_f_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_f_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_f_f16_e64 s5, s1, s2
@@ -1420,12 +1460,12 @@ v_cmp_f_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_f_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x00,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_f_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_f_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_f_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_f_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_f_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_f_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_f_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_f_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_f_f16_e64 s[10:11], s1, s2
@@ -1479,6 +1519,26 @@ v_cmp_f_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_f_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_f_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x00,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_f_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_f_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x00,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_f_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x00,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_f_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x00,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_f_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x00,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_f_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x00,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_f_f32_e64 s5, v1, v2
 // W32: v_cmp_f_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
@@ -2097,12 +2157,12 @@ v_cmp_f_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_f_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_f_u64_e64 null, 0xaf123456, vcc   ; encoding: [0x7c,0x00,0x58,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_ge_f16_e64 s5, v1, v2
-// W32: v_cmp_ge_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ge_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_ge_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s5, v255, v255
-// W32: v_cmp_ge_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ge_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_ge_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_ge_f16_e64 s5, s1, s2
@@ -2153,12 +2213,12 @@ v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ge_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_ge_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ge_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_ge_f16_e64 s[10:11], s1, s2
@@ -2212,6 +2272,26 @@ v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_ge_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_ge_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_ge_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_ge_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_ge_f32_e64 s5, v1, v2
 // W32: v_cmp_ge_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -3108,12 +3188,12 @@ v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_ge_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_ge_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_gt_f16_e64 s5, v1, v2
-// W32: v_cmp_gt_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_gt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_gt_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s5, v255, v255
-// W32: v_cmp_gt_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_gt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_gt_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_gt_f16_e64 s5, s1, s2
@@ -3164,12 +3244,12 @@ v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_gt_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_gt_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_gt_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_gt_f16_e64 s[10:11], s1, s2
@@ -3223,6 +3303,26 @@ v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_gt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_gt_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_gt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_gt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_gt_f32_e64 s5, v1, v2
 // W32: v_cmp_gt_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -4119,12 +4219,12 @@ v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_gt_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_gt_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_le_f16_e64 s5, v1, v2
-// W32: v_cmp_le_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_le_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_le_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s5, v255, v255
-// W32: v_cmp_le_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_le_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_le_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_le_f16_e64 s5, s1, s2
@@ -4175,12 +4275,12 @@ v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_le_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_le_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_le_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_le_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_le_f16_e64 s[10:11], s1, s2
@@ -4234,6 +4334,26 @@ v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_le_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_le_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_le_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_le_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_le_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_le_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_le_f32_e64 s5, v1, v2
 // W32: v_cmp_le_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -5130,12 +5250,12 @@ v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_le_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_le_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_lg_f16_e64 s5, v1, v2
-// W32: v_cmp_lg_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_lg_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_lg_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s5, v255, v255
-// W32: v_cmp_lg_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_lg_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_lg_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_lg_f16_e64 s5, s1, s2
@@ -5186,12 +5306,12 @@ v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_lg_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_lg_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_lg_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_lg_f16_e64 s[10:11], s1, s2
@@ -5245,6 +5365,26 @@ v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_lg_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_lg_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_lg_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_lg_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_lg_f32_e64 s5, v1, v2
 // W32: v_cmp_lg_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -7172,12 +7312,12 @@ v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_ne_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_ne_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_neq_f16_e64 s5, v1, v2
-// W32: v_cmp_neq_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_neq_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_neq_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s5, v255, v255
-// W32: v_cmp_neq_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_neq_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_neq_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_neq_f16_e64 s5, s1, s2
@@ -7228,12 +7368,12 @@ v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_neq_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_neq_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_neq_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_neq_f16_e64 s[10:11], s1, s2
@@ -7287,6 +7427,26 @@ v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_neq_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_neq_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_neq_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_neq_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_neq_f32_e64 s5, v1, v2
 // W32: v_cmp_neq_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -7493,12 +7653,12 @@ v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nge_f16_e64 s5, v1, v2
-// W32: v_cmp_nge_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nge_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nge_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s5, v255, v255
-// W32: v_cmp_nge_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nge_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nge_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nge_f16_e64 s5, s1, s2
@@ -7549,12 +7709,12 @@ v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nge_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_nge_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nge_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nge_f16_e64 s[10:11], s1, s2
@@ -7608,6 +7768,26 @@ v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nge_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nge_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nge_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nge_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nge_f32_e64 s5, v1, v2
 // W32: v_cmp_nge_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -7814,12 +7994,12 @@ v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_ngt_f16_e64 s5, v1, v2
-// W32: v_cmp_ngt_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ngt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_ngt_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s5, v255, v255
-// W32: v_cmp_ngt_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ngt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_ngt_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e64 s5, s1, s2
@@ -7870,12 +8050,12 @@ v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ngt_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_ngt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e64 s[10:11], s1, s2
@@ -7929,6 +8109,26 @@ v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_ngt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_ngt_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_ngt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_ngt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_ngt_f32_e64 s5, v1, v2
 // W32: v_cmp_ngt_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -8135,12 +8335,12 @@ v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nle_f16_e64 s5, v1, v2
-// W32: v_cmp_nle_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nle_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nle_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s5, v255, v255
-// W32: v_cmp_nle_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nle_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nle_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nle_f16_e64 s5, s1, s2
@@ -8191,12 +8391,12 @@ v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nle_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_nle_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nle_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nle_f16_e64 s[10:11], s1, s2
@@ -8250,6 +8450,26 @@ v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nle_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nle_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nle_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nle_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nle_f32_e64 s5, v1, v2
 // W32: v_cmp_nle_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -8456,12 +8676,12 @@ v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nlg_f16_e64 s5, v1, v2
-// W32: v_cmp_nlg_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlg_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nlg_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s5, v255, v255
-// W32: v_cmp_nlg_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlg_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nlg_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e64 s5, s1, s2
@@ -8512,12 +8732,12 @@ v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlg_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_nlg_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e64 s[10:11], s1, s2
@@ -8571,6 +8791,26 @@ v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nlg_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nlg_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nlg_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nlg_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nlg_f32_e64 s5, v1, v2
 // W32: v_cmp_nlg_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -8777,12 +9017,12 @@ v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nlt_f16_e64 s5, v1, v2
-// W32: v_cmp_nlt_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nlt_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s5, v255, v255
-// W32: v_cmp_nlt_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nlt_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e64 s5, s1, s2
@@ -8833,12 +9073,12 @@ v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlt_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_nlt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e64 s[10:11], s1, s2
@@ -8892,6 +9132,26 @@ v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nlt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nlt_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nlt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nlt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nlt_f32_e64 s5, v1, v2
 // W32: v_cmp_nlt_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -9098,12 +9358,12 @@ v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_o_f16_e64 s5, v1, v2
-// W32: v_cmp_o_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_o_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_o_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s5, v255, v255
-// W32: v_cmp_o_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_o_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_o_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_o_f16_e64 s5, s1, s2
@@ -9154,12 +9414,12 @@ v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_o_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_o_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_o_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_o_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_o_f16_e64 s[10:11], s1, s2
@@ -9213,6 +9473,26 @@ v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_o_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_o_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_o_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_o_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_o_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_o_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_o_f32_e64 s5, v1, v2
 // W32: v_cmp_o_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
@@ -9419,12 +9699,12 @@ v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_t_f16_e64 s5, v1, v2
-// W32: v_cmp_t_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_t_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_t_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_t_f16_e64 s5, v255, v255
-// W32: v_cmp_t_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_t_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_t_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_t_f16_e64 s5, s1, s2
@@ -9475,12 +9755,12 @@ v_cmp_t_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_t_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_t_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_t_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_t_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_t_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_t_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_t_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_t_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_t_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_t_f16_e64 s[10:11], s1, s2
@@ -9534,6 +9814,10 @@ v_cmp_t_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_t_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_t_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_t_f32_e64 s5, v1, v2
 // W32: v_cmp_t_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
@@ -10152,12 +10436,12 @@ v_cmp_t_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_t_u64_e64 null, 0xaf123456, vcc
 // GFX11: v_cmp_t_u64_e64 null, 0xaf123456, vcc   ; encoding: [0x7c,0x00,0x5f,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_tru_f16_e64 s5, v1, v2
-// W32: v_cmp_t_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_tru_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_t_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64 s5, v255, v255
-// W32: v_cmp_t_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_tru_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_t_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_tru_f16_e64 s5, s1, s2
@@ -10208,12 +10492,12 @@ v_cmp_tru_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_t_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_t_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_tru_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_t_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_tru_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_t_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_tru_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_t_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_tru_f16_e64 s[10:11], s1, s2
@@ -10267,6 +10551,22 @@ v_cmp_tru_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_tru_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_t_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_t_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_t_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_t_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_t_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_tru_f32_e64 s5, v1, v2
 // W32: v_cmp_t_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -10473,12 +10773,12 @@ v_cmp_tru_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_tru_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX11: v_cmp_t_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2f,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_u_f16_e64 s5, v1, v2
-// W32: v_cmp_u_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_u_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_u_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s5, v255, v255
-// W32: v_cmp_u_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_u_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_u_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_u_f16_e64 s5, s1, s2
@@ -10529,12 +10829,12 @@ v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_u_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_u_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_u_f16_e64 s[10:11], v255.l, v255.l
+// W64: v_cmp_u_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_u_f16_e64 s[10:11], s1, s2
@@ -10588,6 +10888,26 @@ v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_u_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_u_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_u_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_u_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_u_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_u_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_u_f32_e64 s5, v1, v2
 // W32: v_cmp_u_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
index 61fa7cad3fea2..51af416dbbc55 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
@@ -4,124 +4,164 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e32 vcc_lo, v1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v1.l, v2.l  ; encoding: [0x01,0x05,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v127, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v127.l, v2.l ; encoding: [0x7f,0x05,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, s1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, s1, v2.l    ; encoding: [0x01,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, s105, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, s105, v2.l  ; encoding: [0x69,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2.l ; encoding: [0x6a,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2.l ; encoding: [0x6b,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2.l ; encoding: [0x7b,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, m0, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, m0, v2.l    ; encoding: [0x7d,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, null, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, null, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, null, v2.l  ; encoding: [0x7c,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, -1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, -1, v2.l    ; encoding: [0xc1,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2.l   ; encoding: [0xf0,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2
-// W64: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc, v1.l, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v1.l, v2.l     ; encoding: [0x01,0x05,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v127, v2
-// W64: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc, v127.l, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v127.l, v2.l   ; encoding: [0x7f,0x05,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, s1, v2
-// W64: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, s1, v2.l
+// W64: v_cmp_class_f16_e32 vcc, s1, v2.l       ; encoding: [0x01,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, s105, v2
-// W64: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, s105, v2.l
+// W64: v_cmp_class_f16_e32 vcc, s105, v2.l     ; encoding: [0x69,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, vcc_lo, v2
-// W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, vcc_hi, v2
-// W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, ttmp15, v2
-// W64: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_class_f16_e32 vcc, ttmp15, v2.l   ; encoding: [0x7b,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, m0, v2
-// W64: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, m0, v2.l
+// W64: v_cmp_class_f16_e32 vcc, m0, v2.l       ; encoding: [0x7d,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, exec_lo, v2
-// W64: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_class_f16_e32 vcc, exec_lo, v2.l  ; encoding: [0x7e,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, exec_hi, v2
-// W64: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_class_f16_e32 vcc, exec_hi, v2.l  ; encoding: [0x7f,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, null, v2
-// W64: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, null, v2.l
+// W64: v_cmp_class_f16_e32 vcc, null, v2.l     ; encoding: [0x7c,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, -1, v2
-// W64: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, -1, v2.l
+// W64: v_cmp_class_f16_e32 vcc, -1, v2.l       ; encoding: [0xc1,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, 0.5, v2
-// W64: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, 0.5, v2.l
+// W64: v_cmp_class_f16_e32 vcc, 0.5, v2.l      ; encoding: [0xf0,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, src_scc, v2
-// W64: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, src_scc, v2.l
+// W64: v_cmp_class_f16_e32 vcc, src_scc, v2.l  ; encoding: [0xfd,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_class_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v1.h, v2.l  ; encoding: [0x81,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v1.h, v2.l     ; encoding: [0x81,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v127.h, v2.l ; encoding: [0xff,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.h, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v127.h, v2.l   ; encoding: [0xff,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v127.l ; encoding: [0xf0,0xfe,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0.5, v127.l
+// W64: v_cmp_class_f16_e32 vcc, 0.5, v127.l    ; encoding: [0xf0,0xfe,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0xfb,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, src_scc, v2.h
+// W64: v_cmp_class_f16_e32 vcc, src_scc, v2.h  ; encoding: [0xfd,0x04,0xfb,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2
@@ -340,124 +380,164 @@ v_cmp_class_f64 vcc, 0xaf123456, v255
 // W64: v_cmp_class_f64_e32 vcc, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v127, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, s1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, s105, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, m0, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, null, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, null, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, -1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2
-// W64: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc, v1.l, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.l, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v127, v2
-// W64: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc, s1, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, s1, v2
-// W64: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, s105, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, s105, v2
-// W64: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, vcc_lo, v2
-// W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, vcc_hi, v2
-// W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, ttmp15, v2
-// W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, m0, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, m0, v2
-// W64: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, exec_lo, v2
-// W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, exec_hi, v2
-// W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, null, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, null, v2
-// W64: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, -1, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, -1, v2
-// W64: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, 0.5, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, 0.5, v2
-// W64: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, src_scc, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, src_scc, v2
-// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.h, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0.5, v127.l
+// W64: v_cmp_eq_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x05,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, src_scc, v2.h
+// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x05,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2
@@ -1444,124 +1524,164 @@ v_cmp_eq_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_eq_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v127, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, s1, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, s105, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, m0, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, null, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, null, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, -1, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_f_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x00,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_f_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2
-// W64: v_cmp_f_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x00,0x7c]
+v_cmp_f_f16 vcc, v1.l, v2.l
+// W64: v_cmp_f_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127.l, v2.l
+// W64: v_cmp_f_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v127, v2
-// W64: v_cmp_f_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x00,0x7c]
+v_cmp_f_f16 vcc, s1, v2.l
+// W64: v_cmp_f_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, s1, v2
-// W64: v_cmp_f_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, s105, v2.l
+// W64: v_cmp_f_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, s105, v2
-// W64: v_cmp_f_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_f_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, vcc_lo, v2
-// W64: v_cmp_f_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_f_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, vcc_hi, v2
-// W64: v_cmp_f_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_f_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, ttmp15, v2
-// W64: v_cmp_f_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, m0, v2.l
+// W64: v_cmp_f_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, m0, v2
-// W64: v_cmp_f_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_f_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, exec_lo, v2
-// W64: v_cmp_f_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_f_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, exec_hi, v2
-// W64: v_cmp_f_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, null, v2.l
+// W64: v_cmp_f_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, null, v2
-// W64: v_cmp_f_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, -1, v2.l
+// W64: v_cmp_f_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, -1, v2
-// W64: v_cmp_f_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, 0.5, v2.l
+// W64: v_cmp_f_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, 0.5, v2
-// W64: v_cmp_f_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, src_scc, v2.l
+// W64: v_cmp_f_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x00,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, src_scc, v2
-// W64: v_cmp_f_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x00,0x7c]
+v_cmp_f_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_f_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_f_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_f_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.h, v2.l
+// W64: v_cmp_f_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_f_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127.h, v2.l
+// W64: v_cmp_f_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_f_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, 0.5, v127.l
+// W64: v_cmp_f_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_f_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x01,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, src_scc, v2.h
+// W64: v_cmp_f_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x01,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_f_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f32 vcc_lo, v1, v2
@@ -2212,124 +2332,164 @@ v_cmp_f_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_f_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v127, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, s1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, s105, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, m0, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, null, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, null, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, -1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2
-// W64: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc, v1.l, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v127, v2
-// W64: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc, v127.l, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, s1, v2
-// W64: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, s1, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, s105, v2
-// W64: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, s105, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, vcc_lo, v2
-// W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, vcc_hi, v2
-// W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, ttmp15, v2
-// W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, m0, v2
-// W64: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, m0, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, exec_lo, v2
-// W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, exec_hi, v2
-// W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, null, v2
-// W64: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, null, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, -1, v2
-// W64: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, -1, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, 0.5, v2
-// W64: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, 0.5, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, src_scc, v2
-// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, src_scc, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ge_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.h, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0.5, v127.l
+// W64: v_cmp_ge_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0d,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, src_scc, v2.h
+// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0d,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2
@@ -3316,124 +3476,164 @@ v_cmp_ge_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ge_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v127, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, s1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, s105, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, m0, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, null, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, -1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2
-// W64: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v127, v2
-// W64: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc, s1, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, s1, v2
-// W64: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, s105, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, s105, v2
-// W64: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, ttmp15, v2
-// W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, m0, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, m0, v2
-// W64: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, exec_lo, v2
-// W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, exec_hi, v2
-// W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, null, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, null, v2
-// W64: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, -1, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, -1, v2
-// W64: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, 0.5, v2
-// W64: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, src_scc, v2
-// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.h, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0.5, v127.l
+// W64: v_cmp_gt_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x09,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, src_scc, v2.h
+// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x09,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc_lo, v1, v2
@@ -4420,124 +4620,164 @@ v_cmp_gt_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_gt_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v127, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, s1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, s105, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, m0, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, null, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, null, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, -1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2
-// W64: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc, v1.l, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.l, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v127, v2
-// W64: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc, s1, v2.l
+// W64: v_cmp_le_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, s1, v2
-// W64: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, s105, v2.l
+// W64: v_cmp_le_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, s105, v2
-// W64: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, vcc_lo, v2
-// W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, vcc_hi, v2
-// W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_le_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, ttmp15, v2
-// W64: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, m0, v2.l
+// W64: v_cmp_le_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, m0, v2
-// W64: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_le_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, exec_lo, v2
-// W64: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_le_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, exec_hi, v2
-// W64: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, null, v2.l
+// W64: v_cmp_le_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, null, v2
-// W64: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, -1, v2.l
+// W64: v_cmp_le_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, -1, v2
-// W64: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, 0.5, v2.l
+// W64: v_cmp_le_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, 0.5, v2
-// W64: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, src_scc, v2.l
+// W64: v_cmp_le_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, src_scc, v2
-// W64: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_le_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.h, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0.5, v127.l
+// W64: v_cmp_le_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x07,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, src_scc, v2.h
+// W64: v_cmp_le_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x07,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2
@@ -5524,124 +5764,164 @@ v_cmp_le_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_le_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v127, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, s1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, s105, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, m0, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, null, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, null, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, -1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2
-// W64: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc, v1.l, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.l, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s1, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s105, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v127, v2
-// W64: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, s1, v2
-// W64: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, m0, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, s105, v2
-// W64: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, vcc_lo, v2
-// W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, vcc_hi, v2
-// W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, null, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, ttmp15, v2
-// W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, -1, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, m0, v2
-// W64: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, 0.5, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, exec_lo, v2
-// W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, src_scc, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, exec_hi, v2
-// W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, null, v2
-// W64: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, -1, v2
-// W64: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.h, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, 0.5, v2
-// W64: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0.5, v127.l
+// W64: v_cmp_lg_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, src_scc, v2
-// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0b,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, src_scc, v2.h
+// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0b,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2
@@ -7772,124 +8052,164 @@ v_cmp_ne_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ne_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v127, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, s1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, s105, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, m0, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, null, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, null, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, -1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2
-// W64: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc, v1.l, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.l, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, s1, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v127, v2
-// W64: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc, s105, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, s1, v2
-// W64: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, s105, v2
-// W64: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, vcc_lo, v2
-// W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, vcc_hi, v2
-// W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, m0, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, ttmp15, v2
-// W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, m0, v2
-// W64: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, exec_lo, v2
-// W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, null, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, exec_hi, v2
-// W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, -1, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, null, v2
-// W64: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, 0.5, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, -1, v2
-// W64: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, src_scc, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, 0.5, v2
-// W64: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, src_scc, v2
-// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.h, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0.5, v127.l
+// W64: v_cmp_neq_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1b,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, src_scc, v2.h
+// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1b,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2
@@ -8108,124 +8428,164 @@ v_cmp_neq_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_neq_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v127, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, s1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, s105, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, m0, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, null, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, -1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nge_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2
-// W64: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v127, v2
-// W64: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, s1, v2
-// W64: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, s1, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, s105, v2
-// W64: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, s105, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, ttmp15, v2
-// W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, m0, v2
-// W64: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, m0, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, exec_lo, v2
-// W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, exec_hi, v2
-// W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, null, v2
-// W64: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, null, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, -1, v2
-// W64: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, -1, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.h, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127.h, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, 0.5, v2
-// W64: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0.5, v127.l
+// W64: v_cmp_nge_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, src_scc, v2
-// W64: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x13,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, src_scc, v2.h
+// W64: v_cmp_nge_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x13,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nge_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2
@@ -8444,124 +8804,164 @@ v_cmp_nge_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nge_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v127, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, s1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, s105, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, m0, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, null, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, -1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, s1, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v127, v2
-// W64: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc, s105, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, s1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, s105, v2
-// W64: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, m0, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, ttmp15, v2
-// W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, m0, v2
-// W64: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, exec_lo, v2
-// W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, null, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, exec_hi, v2
-// W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, -1, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, null, v2
-// W64: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, -1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, 0.5, v2
-// W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.h, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.h, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0.5, v127.l
+// W64: v_cmp_ngt_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, src_scc, v2
-// W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x17,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, src_scc, v2.h
+// W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x17,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2
@@ -8780,124 +9180,164 @@ v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ngt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v127, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, s1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, s105, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, m0, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, null, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, -1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2
-// W64: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s1, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s105, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v127, v2
-// W64: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, s1, v2
-// W64: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, s105, v2
-// W64: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, m0, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, ttmp15, v2
-// W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, m0, v2
-// W64: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, null, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, exec_lo, v2
-// W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, -1, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, exec_hi, v2
-// W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, null, v2
-// W64: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, -1, v2
-// W64: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, 0.5, v2
-// W64: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.h, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, src_scc, v2
-// W64: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.h, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0.5, v127.l
+// W64: v_cmp_nle_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x19,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, src_scc, v2.h
+// W64: v_cmp_nle_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x19,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2
@@ -9116,124 +9556,164 @@ v_cmp_nle_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nle_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v127, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, s1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, s105, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, m0, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, null, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, -1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s1, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s105, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v127, v2
-// W64: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, s1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, s105, v2
-// W64: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, m0, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, ttmp15, v2
-// W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, null, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, m0, v2
-// W64: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, -1, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, exec_lo, v2
-// W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, exec_hi, v2
-// W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, null, v2
-// W64: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, -1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.h, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, 0.5, v2
-// W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.h, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, src_scc, v2
-// W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0.5, v127.l
+// W64: v_cmp_nlg_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x15,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, src_scc, v2.h
+// W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x15,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2
@@ -9452,124 +9932,164 @@ v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nlg_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v127, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, s1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, s105, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, m0, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, null, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, -1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v127, v2
-// W64: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, s1, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, s1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, s105, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, s105, v2
-// W64: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, ttmp15, v2
-// W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, m0, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, m0, v2
-// W64: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, exec_lo, v2
-// W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, exec_hi, v2
-// W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, null, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, null, v2
-// W64: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, -1, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, -1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, 0.5, v2
-// W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, src_scc, v2
-// W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.h, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.h, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0.5, v127.l
+// W64: v_cmp_nlt_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1d,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, src_scc, v2.h
+// W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1d,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2
@@ -9788,124 +10308,164 @@ v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nlt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v127, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, s1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, s105, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, m0, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, null, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, null, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, -1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_o_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2
-// W64: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc, v1.l, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v127, v2
-// W64: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc, v127.l, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, s1, v2
-// W64: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, s1, v2.l
+// W64: v_cmp_o_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, s105, v2
-// W64: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, s105, v2.l
+// W64: v_cmp_o_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, vcc_lo, v2
-// W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, vcc_hi, v2
-// W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, ttmp15, v2
-// W64: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_o_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, m0, v2
-// W64: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, m0, v2.l
+// W64: v_cmp_o_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, exec_lo, v2
-// W64: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_o_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, exec_hi, v2
-// W64: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_o_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, null, v2
-// W64: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, null, v2.l
+// W64: v_cmp_o_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, -1, v2
-// W64: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, -1, v2.l
+// W64: v_cmp_o_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, 0.5, v2
-// W64: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, 0.5, v2.l
+// W64: v_cmp_o_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, src_scc, v2
-// W64: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, src_scc, v2.l
+// W64: v_cmp_o_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_o_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.h, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127.h, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0.5, v127.l
+// W64: v_cmp_o_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x0f,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, src_scc, v2.h
+// W64: v_cmp_o_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x0f,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2
@@ -10124,124 +10684,204 @@ v_cmp_o_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_o_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v127, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, s1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, s105, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, m0, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, null, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, null, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, -1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_t_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2
-// W64: v_cmp_t_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x1e,0x7c]
+v_cmp_t_f16 vcc, v1.l, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v127, v2
-// W64: v_cmp_t_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x1e,0x7c]
+v_cmp_t_f16 vcc, v127.l, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, s1, v2
-// W64: v_cmp_t_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, s1, v2.l
+// W64: v_cmp_t_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, s105, v2
-// W64: v_cmp_t_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, s105, v2.l
+// W64: v_cmp_t_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, vcc_lo, v2
-// W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, vcc_hi, v2
-// W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, ttmp15, v2
-// W64: v_cmp_t_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_t_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, m0, v2
-// W64: v_cmp_t_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, m0, v2.l
+// W64: v_cmp_t_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, exec_lo, v2
-// W64: v_cmp_t_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_t_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, exec_hi, v2
-// W64: v_cmp_t_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_t_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, null, v2
-// W64: v_cmp_t_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, null, v2.l
+// W64: v_cmp_t_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, -1, v2
-// W64: v_cmp_t_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, -1, v2.l
+// W64: v_cmp_t_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, 0.5, v2
-// W64: v_cmp_t_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, 0.5, v2.l
+// W64: v_cmp_t_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, src_scc, v2
-// W64: v_cmp_t_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x1e,0x7c]
+v_cmp_t_f16 vcc, src_scc, v2.l
+// W64: v_cmp_t_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_t_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.h, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1.h, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127.h, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127.h, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_t_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, 0.5, v127.l
+// W64: v_cmp_t_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_t_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, 0.5, v127.l
+// W64: v_cmp_t_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_t_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x1f,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, src_scc, v2.h
+// W64: v_cmp_t_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x1f,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_t_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x1f,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, src_scc, v2.h
+// W64: v_cmp_t_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x1f,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_t_f32 vcc_lo, v1, v2
@@ -10892,52 +11532,52 @@ v_cmp_t_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_t_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v127, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, s1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, s105, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, m0, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, null, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, null, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, -1, v2
-// W32: v_cmp_t_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_t_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x1e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc_lo, 0.5, v2
@@ -10952,52 +11592,52 @@ v_cmp_tru_f16 vcc_lo, 0xfe0b, v127
 // W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2
-// W64: v_cmp_t_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x1e,0x7c]
+v_cmp_tru_f16 vcc, v1.l, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v127, v2
-// W64: v_cmp_t_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x1e,0x7c]
+v_cmp_tru_f16 vcc, v127.l, v2.l
+// W64: v_cmp_t_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, s1, v2
-// W64: v_cmp_t_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, s1, v2.l
+// W64: v_cmp_t_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, s105, v2
-// W64: v_cmp_t_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, s105, v2.l
+// W64: v_cmp_t_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, vcc_lo, v2
-// W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, vcc_hi, v2
-// W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, ttmp15, v2
-// W64: v_cmp_t_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_t_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, m0, v2
-// W64: v_cmp_t_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, m0, v2.l
+// W64: v_cmp_t_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, exec_lo, v2
-// W64: v_cmp_t_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_t_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, exec_hi, v2
-// W64: v_cmp_t_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_t_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, null, v2
-// W64: v_cmp_t_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, null, v2.l
+// W64: v_cmp_t_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, -1, v2
-// W64: v_cmp_t_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x1e,0x7c]
+v_cmp_tru_f16 vcc, -1, v2.l
+// W64: v_cmp_t_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x1e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc, 0.5, v2
@@ -11228,124 +11868,164 @@ v_cmp_tru_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_t_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v127, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, s1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, s105, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, m0, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, null, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, null, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, -1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2
-// W64: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc, v1.l, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.l, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v127, v2
-// W64: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc, s1, v2.l
+// W64: v_cmp_u_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, s1, v2
-// W64: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, s105, v2.l
+// W64: v_cmp_u_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, s105, v2
-// W64: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, vcc_lo, v2
-// W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, vcc_hi, v2
-// W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_u_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, ttmp15, v2
-// W64: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, m0, v2.l
+// W64: v_cmp_u_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, m0, v2
-// W64: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_u_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, exec_lo, v2
-// W64: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_u_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, exec_hi, v2
-// W64: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, null, v2.l
+// W64: v_cmp_u_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, null, v2
-// W64: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, -1, v2.l
+// W64: v_cmp_u_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, -1, v2
-// W64: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, 0.5, v2.l
+// W64: v_cmp_u_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, 0.5, v2
-// W64: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, src_scc, v2.l
+// W64: v_cmp_u_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, src_scc, v2
-// W64: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.h, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_u_f16 vcc_lo, 0.5, v127.l
+// W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0.5, v127.l
+// W64: v_cmp_u_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x11,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, src_scc, v2.h
+// W64: v_cmp_u_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x11,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
index b0d20c4a42fe1..34508e5a9e268 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
@@ -4,116 +4,220 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x35,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -228,116 +332,220 @@ v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound
 // W64: v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -1108,116 +1316,220 @@ v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_f_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_f_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_f_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_f_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_f_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_f_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_f_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_f_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_f_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_f_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_f_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_f_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_f_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_f_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_f_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_f_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_f_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_f_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_f_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_f_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_f_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -1556,116 +1868,220 @@ v_cmp_f_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0
 // W64: v_cmp_f_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -2436,180 +2852,284 @@ v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc, v1, v2 row_mirror
@@ -3316,116 +3836,220 @@ v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -4196,116 +4820,220 @@ v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6060,116 +6788,220 @@ v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6284,144 +7116,248 @@ v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1
+// W32: v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:15
@@ -6508,116 +7444,220 @@ v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6732,116 +7772,220 @@ v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6956,116 +8100,220 @@ v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -7180,116 +8428,220 @@ v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -7404,116 +8756,220 @@ v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -7628,116 +9084,244 @@ v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_
 // W64: v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_t_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_t_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_t_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_t_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_t_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_t_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_t_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_t_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_t_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_t_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_t_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_t_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_t_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_t_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_t_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_t_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_t_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_t_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_t_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_t_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -8076,48 +9660,48 @@ v_cmp_t_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0
 // W64: v_cmp_t_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
@@ -8132,48 +9716,48 @@ v_cmp_tru_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 b
 // W32: v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_t_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_t_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_t_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_t_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_tru_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
@@ -8300,116 +9884,220 @@ v_cmp_tru_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_t_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
index 9f02a41603fd2..59f10810abcfc 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
@@ -4,28 +4,52 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -52,28 +76,52 @@ v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -244,28 +292,52 @@ v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_f_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_f_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_f_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_f_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_f_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_f_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_f_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_f_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -340,28 +412,52 @@ v_cmp_f_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_f_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -532,28 +628,52 @@ v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -724,28 +844,52 @@ v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -916,28 +1060,52 @@ v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1324,28 +1492,52 @@ v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1372,28 +1564,52 @@ v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1420,28 +1636,52 @@ v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1468,28 +1708,52 @@ v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1516,28 +1780,52 @@ v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1564,28 +1852,52 @@ v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1612,28 +1924,52 @@ v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1660,28 +1996,76 @@ v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_t_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_t_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_t_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_t_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_t_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_t_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1756,8 +2140,8 @@ v_cmp_t_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_t_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
@@ -1768,8 +2152,8 @@ v_cmp_tru_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W32: v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
@@ -1804,28 +2188,52 @@ v_cmp_tru_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_t_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
index b6b75a222edfe..14692f2fcf624 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
@@ -1,128 +1,251 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
 
-v_cmp_class_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h
 // GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -388,289 +511,421 @@ v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.h, v255.h
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.l, v255.l
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.h, v255.h
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.l, v255.l
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.h, v2.h
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.l, v2.l
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, vcc_hi, v255.h
+v_cmp_ge_f16_e32 vcc, vcc_hi, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, vcc_hi, v255.l
+v_cmp_ge_f16_e32 vcc, vcc_hi, v255.l
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, vcc_lo, v255.h
+v_cmp_ge_f16_e32 vcc, vcc_lo, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, vcc_lo, v255.l
+v_cmp_ge_f16_e32 vcc, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255.h
+v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255.h
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255.l
+v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255.h
+v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255.h
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255.l
+v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.h, v255.h
+v_cmp_ge_i16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_ge_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.l, v255.l
+v_cmp_ge_i16_e32 vcc, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_ge_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v127.h, v255.h
+v_cmp_ge_i16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v127.h, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
@@ -784,71 +1039,137 @@ v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -1114,115 +1435,181 @@ v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16_e32 vcc, v1.h, v255.h
+v_cmp_le_f16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v1.l, v255.l
+v_cmp_le_f16_e32 vcc, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.h, v255.h
+v_cmp_le_f16_e32 vcc, v127.h, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.l, v255.l
+v_cmp_le_f16_e32 vcc, v127.l, v255.l
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v128.h, v2.h
+v_cmp_le_f16_e32 vcc, v128.h, v2.h
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, v128.l, v2.l
@@ -1444,71 +1831,137 @@ v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -2170,532 +2623,1060 @@ v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255.h
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255.l
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255.h
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v127, v255
+v_cmp_t_f16_e32 vcc, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_t_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v128, v2
+v_cmp_t_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc, v128.l, v2.l
 // GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_t_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, vcc_hi, v255
+v_cmp_t_f16_e32 vcc, vcc_hi, v255.h
 // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, vcc_lo, v255
+v_cmp_t_f16_e32 vcc, vcc_hi, v255.l
 // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v127, v255
+v_cmp_t_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16_e32 vcc_lo, v1.l, v255.l
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_t_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v128, v2
+v_cmp_t_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_t_f16_e32 vcc_lo, v128.l, v2.l
 // GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255
+v_cmp_t_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255.h
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255
+v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255.l
 // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_tru_f16_e32 vcc, v1, v255
@@ -2764,68 +3745,134 @@ v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255
 v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255
 // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc, v1.h, v255.h
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index d29cf0acace69..d3eba050d6be7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -1,128 +1,251 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
 
-v_cmp_class_f16 vcc, v1, v255
-// GFX11: v_cmp_class_f16_e64 vcc, v1, v255       ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, v1.h, v255.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v127, v255
-// GFX11: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, v1.l, v255.l   ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v127, v255
-// GFX11: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v128, v2
-// GFX11: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_class_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v128, v2
-// GFX11: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_class_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, v127.l, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, v127.l, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_class_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_class_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_class_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_class_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, v1, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, v1, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_class_f16_e64 vcc, v128.l, v2.l   ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, v127, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_class_f16_e64 vcc, v128.l, v2.l   ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, v127, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_class_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2
-// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_class_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2
-// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_class_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_class_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_eq_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_eq_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_eq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_eq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_eq_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_eq_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_eq_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_eq_i16 vcc, v1.h, v255.h
 // GFX11: v_cmp_eq_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x32,0xd4,0x01,0xff,0x03,0x00]
@@ -388,137 +511,269 @@ v_cmp_eq_u16 vcc, vcc_lo, v255.l
 v_cmp_eq_u16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_eq_u16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_f_f16 vcc, v1, v255
-// GFX11: v_cmp_f_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_f_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_f_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_f_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_f_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_f_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_f_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v1, v255
-// GFX11: v_cmp_f_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v127, v255
-// GFX11: v_cmp_f_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v127, v255
-// GFX11: v_cmp_f_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v128, v2
-// GFX11: v_cmp_f_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_f_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v128, v2
-// GFX11: v_cmp_f_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_f_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_f_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_f_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_f_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_f_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v1, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v1, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_f_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_f_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v127, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v127, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_f_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_f_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2
-// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_f_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2
-// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_f_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_f_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_ge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_ge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_ge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_ge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_ge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_ge_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ge_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ge_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_ge_i16 vcc, v1.h, v255.h
 // GFX11: v_cmp_ge_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x36,0xd4,0x01,0xff,0x03,0x00]
@@ -784,71 +1039,137 @@ v_cmp_ge_u16 vcc, vcc_lo, v255.l
 v_cmp_ge_u16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_ge_u16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_gt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_gt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_gt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_gt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_gt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_gt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_gt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_gt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_gt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v1, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_gt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_gt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v127, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_gt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v128, v2
-// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_gt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v128, v2
-// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_gt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_gt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_gt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_gt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_gt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_gt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_gt_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_gt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_gt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_gt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_gt_i16 vcc, v1.h, v255.h
 // GFX11: v_cmp_gt_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x34,0xd4,0x01,0xff,0x03,0x00]
@@ -1114,71 +1435,137 @@ v_cmp_gt_u16 vcc, vcc_lo, v255.l
 v_cmp_gt_u16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_gt_u16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_f16 vcc, v1, v255
-// GFX11: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_le_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_le_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_le_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_le_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_le_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_le_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v1, v255
-// GFX11: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v127, v255
-// GFX11: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v127, v255
-// GFX11: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v128, v2
-// GFX11: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_le_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v128, v2
-// GFX11: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_le_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_le_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_le_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_le_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_le_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_le_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_le_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_le_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_le_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_le_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_le_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_le_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_le_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_le_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_le_i16 vcc, v1.h, v255.h
 // GFX11: v_cmp_le_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x01,0xff,0x03,0x00]
@@ -1444,71 +1831,137 @@ v_cmp_le_u16 vcc, vcc_lo, v255.l
 v_cmp_le_u16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_le_u16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v1, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_lg_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_lg_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_lg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_lg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_lg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v1, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_lg_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v127, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_lg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v127, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_lg_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v128, v2
-// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_lg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v128, v2
-// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_lg_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_lg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_lg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_lg_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_lg_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_lg_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_lg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_lg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_lg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_lg_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_lg_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_lt_f16 vcc, v1.h, v255.h
 // GFX11: v_cmp_lt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x01,0xd4,0x01,0xff,0x03,0x00]
@@ -2170,662 +2623,1322 @@ v_cmp_ne_u16 vcc, vcc_lo, v255.l
 v_cmp_ne_u16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_ne_u16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_neq_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_neq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_neq_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_neq_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_neq_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nge_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nge_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nge_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nle_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nle_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+
+v_cmp_nlt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v127, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v127, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v128, v2
-// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_nlt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v128, v2
-// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_nlt_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v1, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v1, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v127, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v127, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_nlt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v128, v2
-// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_nlt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v128, v2
-// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_nlt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_nlt_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v127, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v127, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v128, v2
-// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_o_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2
-// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_o_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v1, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v1, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_o_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_o_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v127, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v127, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_o_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_o_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_o_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v128, v2
-// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_o_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v128, v2
-// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_o_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_o_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v127, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v128, v2
-// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_t_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v128, v2
-// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_t_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v1, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v1, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_t_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_t_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v127, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v127, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_t_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_t_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_t_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v128, v2
-// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_t_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v128, v2
-// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_t_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_t_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_t_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_t_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_o_f16 vcc, v1, v255
-// GFX11: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v1, v255
-// GFX11: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v127, v255
-// GFX11: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v127, v255
-// GFX11: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v128, v2
-// GFX11: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_tru_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v128, v2
-// GFX11: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_tru_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_t_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_t_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_tru_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_t_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_t_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_tru_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_tru_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_tru_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_tru_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_tru_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v1.h, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v1.l, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_u_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_u_f16 vcc, v127.h, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v1, v255
-// GFX11: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v1, v255
-// GFX11: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_u_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v128.h, v2.h
+// GFX11: v_cmp_u_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v127, v255
-// GFX11: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v127, v255
-// GFX11: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_u_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_u_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v128.l, v2.l
+// GFX11: v_cmp_u_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v128, v2
-// GFX11: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_u_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v128, v2
-// GFX11: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+v_cmp_u_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, vcc_hi, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, vcc_hi, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, vcc_lo, v255.h
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+v_cmp_u_f16 vcc, vcc_lo, v255.l
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
index 58cba4cd43a1c..d73ffb6fd2f50 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
@@ -4,20 +4,20 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64 s5, v1, v2
-// W32: v_cmp_class_f16_e64 s5, v1, v2          ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_class_f16_e64 s5, v1.l, v2.l      ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, v255, v2
-// W32: v_cmp_class_f16_e64 s5, v255, v2        ; encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s5, v255.l, v2.l
+// W32: v_cmp_class_f16_e64 s5, v255.l, v2.l    ; encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, s1, v2
-// W32: v_cmp_class_f16_e64 s5, s1, v2          ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+v_cmp_class_f16_e64 s5, s1, v2.l
+// W32: v_cmp_class_f16_e64 s5, s1, v2.l        ; encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s5, s105, v255
-// W32: v_cmp_class_f16_e64 s5, s105, v255      ; encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+v_cmp_class_f16_e64 s5, s105, v255.l
+// W32: v_cmp_class_f16_e64 s5, s105, v255.l    ; encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s5, vcc_lo, s2
@@ -60,21 +60,21 @@ v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo
 // W32: v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo ; encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_class_f16_e64 s[10:11], v1.l, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], v1.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], v255, v2
-// W64: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+v_cmp_class_f16_e64 s10, v255.l, v2.l
+// W32: v_cmp_class_f16_e64 s10, v255.l, v2.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], s1, v2
-// W64: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+v_cmp_class_f16_e64 s[10:11], s1, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], s1, v2.l  ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_class_f16_e64 s[10:11], s105, v255
-// W64: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+v_cmp_class_f16_e64 s10, s105, v255.l
+// W32: v_cmp_class_f16_e64 s10, s105, v255.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], vcc_lo, s2
 // W64: v_cmp_class_f16_e64 s[10:11], vcc_lo, s2 ; encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
@@ -119,6 +119,26 @@ v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo
 v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi
 // GFX12: v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi ; encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
+v_cmp_class_f16_e64 s5, v255.h, v2.l
+// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l    ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s105, v255.h
+// W32: v_cmp_class_f16_e64 s5, s105, v255.h    ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v255.h, v2.l
+// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s105, v255.h
+// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc_lo, 0.5, m0
+// W32: v_cmp_class_f16_e64 vcc_lo, 0.5, m0     ; encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
 v_cmp_class_f32_e64 s5, v1, v2
 // W32: v_cmp_class_f32_e64 s5, v1, v2          ; encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
@@ -349,12 +369,12 @@ v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc
 v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456
 // GFX12: v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 ; encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_eq_f16_e64 s5, v1, v2
-// W32: v_cmp_eq_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_eq_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_eq_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s5, v255, v255
-// W32: v_cmp_eq_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_eq_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_eq_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_eq_f16_e64 s5, s1, s2
@@ -405,13 +425,13 @@ v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_eq_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_eq_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_eq_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_eq_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_eq_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_eq_f16_e64 s[10:11], s1, s2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
@@ -464,6 +484,26 @@ v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_eq_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_eq_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_eq_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_eq_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_eq_f32_e64 s5, v1, v2
 // W32: v_cmp_eq_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -1360,12 +1400,12 @@ v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_eq_u64_e64 null, 0xaf123456, vcc
 // GFX12: v_cmp_eq_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_ge_f16_e64 s5, v1, v2
-// W32: v_cmp_ge_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ge_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_ge_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s5, v255, v255
-// W32: v_cmp_ge_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ge_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_ge_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_ge_f16_e64 s5, s1, s2
@@ -1416,13 +1456,13 @@ v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_ge_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_ge_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_ge_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_ge_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_ge_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_ge_f16_e64 s[10:11], s1, s2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
@@ -1475,6 +1515,26 @@ v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_ge_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_ge_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_ge_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_ge_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_ge_f32_e64 s5, v1, v2
 // W32: v_cmp_ge_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -2371,12 +2431,12 @@ v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_ge_u64_e64 null, 0xaf123456, vcc
 // GFX12: v_cmp_ge_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_gt_f16_e64 s5, v1, v2
-// W32: v_cmp_gt_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_gt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_gt_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s5, v255, v255
-// W32: v_cmp_gt_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_gt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_gt_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_gt_f16_e64 s5, s1, s2
@@ -2427,13 +2487,13 @@ v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_gt_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_gt_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_gt_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_gt_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_gt_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_gt_f16_e64 s[10:11], s1, s2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
@@ -2486,6 +2546,26 @@ v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_gt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_gt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_gt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_gt_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_gt_f32_e64 s5, v1, v2
 // W32: v_cmp_gt_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -3382,12 +3462,12 @@ v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_gt_u64_e64 null, 0xaf123456, vcc
 // GFX12: v_cmp_gt_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_le_f16_e64 s5, v1, v2
-// W32: v_cmp_le_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_le_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_le_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s5, v255, v255
-// W32: v_cmp_le_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_le_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_le_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_le_f16_e64 s5, s1, s2
@@ -3438,13 +3518,13 @@ v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_le_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_le_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_le_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_le_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_le_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_le_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_le_f16_e64 s[10:11], s1, s2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
@@ -3497,6 +3577,26 @@ v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_le_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_le_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_le_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_le_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_le_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_le_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_le_f32_e64 s5, v1, v2
 // W32: v_cmp_le_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -4393,12 +4493,12 @@ v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_le_u64_e64 null, 0xaf123456, vcc
 // GFX12: v_cmp_le_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_lg_f16_e64 s5, v1, v2
-// W32: v_cmp_lg_f16_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_lg_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_lg_f16_e64 s5, v1.l, v2.l         ; encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s5, v255, v255
-// W32: v_cmp_lg_f16_e64 s5, v255, v255         ; encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_lg_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_lg_f16_e64 s5, v255.l, v255.l     ; encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_lg_f16_e64 s5, s1, s2
@@ -4449,13 +4549,13 @@ v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_lg_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_lg_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+v_cmp_lg_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_lg_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_cmp_lg_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_lg_f16_e64 s[10:11], s1, s2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
@@ -4508,6 +4608,26 @@ v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_lg_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_lg_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_lg_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_lg_f16_e64 vcc_lo, 0.5, -m0       ; encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+
 v_cmp_lg_f32_e64 s5, v1, v2
 // W32: v_cmp_lg_f32_e64 s5, v1, v2             ; encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
@@ -6435,12 +6555,12 @@ v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec
 v_cmp_ne_u64_e64 null, 0xaf123456, vcc
 // GFX12: v_cmp_ne_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
-v_cmp_neq_f16_e64 s5, v1, v2
-// W32: v_cmp_neq_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_neq_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_neq_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s5, v255, v255
-// W32: v_cmp_neq_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_neq_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_neq_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_neq_f16_e64 s5, s1, s2
@@ -6491,13 +6611,13 @@ v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_neq_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_neq_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_neq_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_neq_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_neq_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_neq_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
@@ -6550,6 +6670,26 @@ v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_neq_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_neq_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_neq_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_neq_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_neq_f32_e64 s5, v1, v2
 // W32: v_cmp_neq_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -6756,12 +6896,12 @@ v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nge_f16_e64 s5, v1, v2
-// W32: v_cmp_nge_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nge_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nge_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s5, v255, v255
-// W32: v_cmp_nge_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nge_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nge_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nge_f16_e64 s5, s1, s2
@@ -6812,13 +6952,13 @@ v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nge_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_nge_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nge_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_nge_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nge_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_nge_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
@@ -6871,6 +7011,26 @@ v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nge_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nge_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nge_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nge_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nge_f32_e64 s5, v1, v2
 // W32: v_cmp_nge_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -7077,12 +7237,12 @@ v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_ngt_f16_e64 s5, v1, v2
-// W32: v_cmp_ngt_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_ngt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_ngt_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s5, v255, v255
-// W32: v_cmp_ngt_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_ngt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_ngt_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e64 s5, s1, s2
@@ -7133,13 +7293,13 @@ v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_ngt_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_ngt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_ngt_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_ngt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_ngt_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
@@ -7192,6 +7352,26 @@ v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_ngt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_ngt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_ngt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_ngt_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_ngt_f32_e64 s5, v1, v2
 // W32: v_cmp_ngt_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -7398,12 +7578,12 @@ v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nle_f16_e64 s5, v1, v2
-// W32: v_cmp_nle_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nle_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nle_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s5, v255, v255
-// W32: v_cmp_nle_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nle_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nle_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nle_f16_e64 s5, s1, s2
@@ -7454,13 +7634,13 @@ v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nle_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_nle_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nle_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_nle_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nle_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_nle_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
@@ -7513,6 +7693,26 @@ v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nle_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nle_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nle_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nle_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nle_f32_e64 s5, v1, v2
 // W32: v_cmp_nle_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -7719,12 +7919,12 @@ v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nlg_f16_e64 s5, v1, v2
-// W32: v_cmp_nlg_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlg_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nlg_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s5, v255, v255
-// W32: v_cmp_nlg_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlg_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nlg_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e64 s5, s1, s2
@@ -7775,13 +7975,13 @@ v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nlg_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_nlg_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nlg_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_nlg_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_nlg_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
@@ -7834,6 +8034,26 @@ v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nlg_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nlg_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nlg_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nlg_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nlg_f32_e64 s5, v1, v2
 // W32: v_cmp_nlg_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -8040,12 +8260,12 @@ v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_nlt_f16_e64 s5, v1, v2
-// W32: v_cmp_nlt_f16_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_nlt_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_nlt_f16_e64 s5, v1.l, v2.l        ; encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s5, v255, v255
-// W32: v_cmp_nlt_f16_e64 s5, v255, v255        ; encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_nlt_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_nlt_f16_e64 s5, v255.l, v255.l    ; encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e64 s5, s1, s2
@@ -8096,13 +8316,13 @@ v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nlt_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_nlt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cmp_nlt_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_nlt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_nlt_f16_e64 s[10:11], s1, s2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
@@ -8155,6 +8375,26 @@ v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_nlt_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_nlt_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_nlt_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_nlt_f16_e64 vcc_lo, 0.5, -m0      ; encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+
 v_cmp_nlt_f32_e64 s5, v1, v2
 // W32: v_cmp_nlt_f32_e64 s5, v1, v2            ; encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
@@ -8361,12 +8601,12 @@ v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_o_f16_e64 s5, v1, v2
-// W32: v_cmp_o_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_o_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_o_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s5, v255, v255
-// W32: v_cmp_o_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_o_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_o_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_o_f16_e64 s5, s1, s2
@@ -8417,13 +8657,13 @@ v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+v_cmp_o_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_o_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_o_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+v_cmp_o_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_o_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_o_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_o_f16_e64 s[10:11], s1, s2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
@@ -8476,6 +8716,26 @@ v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_o_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_o_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_o_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_o_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_o_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_o_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_o_f32_e64 s5, v1, v2
 // W32: v_cmp_o_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
@@ -8682,12 +8942,12 @@ v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
 v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp
 // GFX12: v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
-v_cmp_u_f16_e64 s5, v1, v2
-// W32: v_cmp_u_f16_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+v_cmp_u_f16_e64 s5, v1.l, v2.l
+// W32: v_cmp_u_f16_e64 s5, v1.l, v2.l          ; encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s5, v255, v255
-// W32: v_cmp_u_f16_e64 s5, v255, v255          ; encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+v_cmp_u_f16_e64 s5, v255.l, v255.l
+// W32: v_cmp_u_f16_e64 s5, v255.l, v255.l      ; encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_u_f16_e64 s5, s1, s2
@@ -8738,13 +8998,13 @@ v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo|
 // W32: v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo| ; encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s[10:11], v1, v2
-// W64: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+v_cmp_u_f16_e64 s10, v1.l, v2.l
+// W32: v_cmp_u_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_u_f16_e64 s[10:11], v255, v255
-// W64: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+v_cmp_u_f16_e64 s10, v255.l, v255.l
+// W32: v_cmp_u_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_u_f16_e64 s[10:11], s1, s2
 // W64: v_cmp_u_f16_e64 s[10:11], s1, s2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
@@ -8797,6 +9057,26 @@ v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
 v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+v_cmp_u_f16_e64 s5, v1.h, v2.l
+// W32: v_cmp_u_f16_e64 s5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, v255.l, v255.h
+// W32: v_cmp_u_f16_e64 s5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v1.h, v2.l
+// W64: v_cmp_u_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v255.l, v255.h
+// W64: v_cmp_u_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc_lo, 0.5, -m0
+// W32: v_cmp_u_f16_e64 vcc_lo, 0.5, -m0        ; encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
 v_cmp_u_f32_e64 s5, v1, v2
 // W32: v_cmp_u_f32_e64 s5, v1, v2              ; encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
index e697177314abc..aa911e235622a 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
@@ -4,128 +4,143 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_class_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -250,128 +265,143 @@ v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0
 v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1018,128 +1048,143 @@ v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1786,128 +1831,143 @@ v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2554,128 +2614,143 @@ v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_le_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3322,128 +3397,143 @@ v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4873,128 +4963,143 @@ v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3
 v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5119,128 +5224,143 @@ v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5365,128 +5485,143 @@ v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5611,128 +5746,143 @@ v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5857,128 +6007,143 @@ v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6103,128 +6268,143 @@ v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6349,128 +6529,143 @@ v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:
 v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_o_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -6595,128 +6790,143 @@ v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x
 v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_half_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_ror:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15
-// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s105, v1.l, v2.l row_ror:15
+// W32: v_cmp_u_f16_e64_dpp s105, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
index 1f73f63e8918a..73be427cab863 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
@@ -4,60 +4,71 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x08,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x08,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x11,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -114,60 +125,75 @@ v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7e,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
-v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -466,60 +492,75 @@ v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -818,60 +859,75 @@ v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1170,60 +1226,75 @@ v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1522,60 +1593,75 @@ v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
-v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2241,60 +2327,75 @@ v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
-v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2351,60 +2452,75 @@ v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2461,60 +2577,75 @@ v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x19,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2571,60 +2702,75 @@ v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2681,60 +2827,75 @@ v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2791,60 +2952,75 @@ v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2901,60 +3077,75 @@ v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3011,60 +3202,75 @@ v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x17,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s105, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x0a,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x0a,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
-v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x93,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
index 324c803e37973..9136b2d352e80 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
@@ -4,124 +4,156 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_e32 vcc_lo, v1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v1.l, v2.l  ; encoding: [0x01,0x05,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v127, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v127.l, v2.l ; encoding: [0x7f,0x05,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, s1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, s1, v2.l    ; encoding: [0x01,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, s105, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, s105, v2.l  ; encoding: [0x69,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2.l ; encoding: [0x6a,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2.l ; encoding: [0x6b,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2.l ; encoding: [0x7b,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, m0, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, m0, v2.l    ; encoding: [0x7d,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, null, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, null, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, null, v2.l  ; encoding: [0x7c,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, -1, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, -1, v2.l    ; encoding: [0xc1,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2.l   ; encoding: [0xf0,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0xfa,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2
-// W64: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc, v1.l, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v1.l, v2.l     ; encoding: [0x01,0x05,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v127, v2
-// W64: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
+v_cmp_class_f16 vcc, v127.l, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v127.l, v2.l   ; encoding: [0x7f,0x05,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, s1, v2
-// W64: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, s1, v2.l
+// W64: v_cmp_class_f16_e32 vcc, s1, v2.l       ; encoding: [0x01,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, s105, v2
-// W64: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, s105, v2.l
+// W64: v_cmp_class_f16_e32 vcc, s105, v2.l     ; encoding: [0x69,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, vcc_lo, v2
-// W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, vcc_hi, v2
-// W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, ttmp15, v2
-// W64: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_class_f16_e32 vcc, ttmp15, v2.l   ; encoding: [0x7b,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, m0, v2
-// W64: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, m0, v2.l
+// W64: v_cmp_class_f16_e32 vcc, m0, v2.l       ; encoding: [0x7d,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, exec_lo, v2
-// W64: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_class_f16_e32 vcc, exec_lo, v2.l  ; encoding: [0x7e,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, exec_hi, v2
-// W64: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_class_f16_e32 vcc, exec_hi, v2.l  ; encoding: [0x7f,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, null, v2
-// W64: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, null, v2.l
+// W64: v_cmp_class_f16_e32 vcc, null, v2.l     ; encoding: [0x7c,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, -1, v2
-// W64: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, -1, v2.l
+// W64: v_cmp_class_f16_e32 vcc, -1, v2.l       ; encoding: [0xc1,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, 0.5, v2
-// W64: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, 0.5, v2.l
+// W64: v_cmp_class_f16_e32 vcc, 0.5, v2.l      ; encoding: [0xf0,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, src_scc, v2
-// W64: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
+v_cmp_class_f16 vcc, src_scc, v2.l
+// W64: v_cmp_class_f16_e32 vcc, src_scc, v2.l  ; encoding: [0xfd,0x04,0xfa,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_class_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v1.h, v2.l  ; encoding: [0x81,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v1.h, v2.l     ; encoding: [0x81,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_class_f16_e32 vcc_lo, v127.h, v2.l ; encoding: [0xff,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.h, v2.l
+// W64: v_cmp_class_f16_e32 vcc, v127.h, v2.l   ; encoding: [0xff,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0xfb,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, src_scc, v2.h
+// W64: v_cmp_class_f16_e32 vcc, src_scc, v2.h  ; encoding: [0xfd,0x04,0xfb,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2
@@ -340,124 +372,156 @@ v_cmp_class_f64 vcc, 0xaf123456, v255
 // W64: v_cmp_class_f64_e32 vcc, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v127, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, s1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, s105, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, m0, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, null, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, null, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, -1, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x04,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2
-// W64: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc, v1.l, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v127, v2
-// W64: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
+v_cmp_eq_f16 vcc, v127.l, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, s1, v2
-// W64: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, s1, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, s105, v2
-// W64: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, s105, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, vcc_lo, v2
-// W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, vcc_hi, v2
-// W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, ttmp15, v2
-// W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, m0, v2
-// W64: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, m0, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, exec_lo, v2
-// W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, exec_hi, v2
-// W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, null, v2
-// W64: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, null, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, -1, v2
-// W64: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, -1, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, 0.5, v2
-// W64: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, 0.5, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, src_scc, v2
-// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
+v_cmp_eq_f16 vcc, src_scc, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x04,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_eq_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_eq_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.h, v2.l
+// W64: v_cmp_eq_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x05,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, src_scc, v2.h
+// W64: v_cmp_eq_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x05,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2
@@ -1428,124 +1492,156 @@ v_cmp_eq_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_eq_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v127, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, s1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, s105, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, m0, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, null, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, null, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, -1, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2
-// W64: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc, v1.l, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v127, v2
-// W64: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
+v_cmp_ge_f16 vcc, v127.l, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, s1, v2
-// W64: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, s1, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, s105, v2
-// W64: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, s105, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, vcc_lo, v2
-// W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, vcc_hi, v2
-// W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, ttmp15, v2
-// W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, m0, v2
-// W64: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, m0, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, exec_lo, v2
-// W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, exec_hi, v2
-// W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, null, v2
-// W64: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, null, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, -1, v2
-// W64: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, -1, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, 0.5, v2
-// W64: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, 0.5, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, src_scc, v2
-// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
+v_cmp_ge_f16 vcc, src_scc, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ge_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.h, v2.l
+// W64: v_cmp_ge_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_ge_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0d,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, src_scc, v2.h
+// W64: v_cmp_ge_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0d,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2
@@ -2516,124 +2612,156 @@ v_cmp_ge_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ge_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v127, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, s1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, s105, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, m0, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, null, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, -1, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x08,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2
-// W64: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s1, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s105, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v127, v2
-// W64: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
+v_cmp_gt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, s1, v2
-// W64: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, s105, v2
-// W64: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, m0, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, ttmp15, v2
-// W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, null, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, m0, v2
-// W64: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, -1, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, exec_lo, v2
-// W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, exec_hi, v2
-// W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, null, v2
-// W64: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, -1, v2
-// W64: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, 0.5, v2
-// W64: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_gt_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.h, v2.l
+// W64: v_cmp_gt_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x08,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, src_scc, v2
-// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
+v_cmp_gt_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x09,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, src_scc, v2.h
+// W64: v_cmp_gt_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x09,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc_lo, v1, v2
@@ -3604,124 +3732,156 @@ v_cmp_gt_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_gt_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v127, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, s1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, s105, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, m0, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, null, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, null, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, -1, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x06,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2
-// W64: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc, v1.l, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.l, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v127, v2
-// W64: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
+v_cmp_le_f16 vcc, s1, v2.l
+// W64: v_cmp_le_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, s1, v2
-// W64: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, s105, v2.l
+// W64: v_cmp_le_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, s105, v2
-// W64: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, vcc_lo, v2
-// W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, vcc_hi, v2
-// W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_le_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, ttmp15, v2
-// W64: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, m0, v2.l
+// W64: v_cmp_le_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, m0, v2
-// W64: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_le_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, exec_lo, v2
-// W64: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_le_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, exec_hi, v2
-// W64: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, null, v2.l
+// W64: v_cmp_le_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, null, v2
-// W64: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, -1, v2.l
+// W64: v_cmp_le_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, -1, v2
-// W64: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, 0.5, v2.l
+// W64: v_cmp_le_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, 0.5, v2
-// W64: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, src_scc, v2.l
+// W64: v_cmp_le_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x06,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, src_scc, v2
-// W64: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
+v_cmp_le_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_le_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_le_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.h, v2.l
+// W64: v_cmp_le_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x07,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, src_scc, v2.h
+// W64: v_cmp_le_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x07,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2
@@ -4692,124 +4852,156 @@ v_cmp_le_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_le_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v127, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, s1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, s105, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, m0, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, null, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, null, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, -1, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2
-// W64: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc, v1.l, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v127, v2
-// W64: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
+v_cmp_lg_f16 vcc, v127.l, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, s1, v2
-// W64: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, s1, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, s105, v2
-// W64: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, s105, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, vcc_lo, v2
-// W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, vcc_hi, v2
-// W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, ttmp15, v2
-// W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, m0, v2
-// W64: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, m0, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, exec_lo, v2
-// W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, exec_hi, v2
-// W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, null, v2
-// W64: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, null, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, -1, v2
-// W64: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, -1, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, 0.5, v2
-// W64: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, 0.5, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, src_scc, v2
-// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
+v_cmp_lg_f16 vcc, src_scc, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_lg_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_lg_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.h, v2.l
+// W64: v_cmp_lg_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0b,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, src_scc, v2.h
+// W64: v_cmp_lg_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0b,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2
@@ -6900,124 +7092,160 @@ v_cmp_ne_u64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ne_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v127, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, s1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, s105, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, m0, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, null, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, null, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, -1, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1a,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2
-// W64: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc, v1.l, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v127, v2
-// W64: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
+v_cmp_neq_f16 vcc, v127.l, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, s1, v2
-// W64: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, s1, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, s105, v2
-// W64: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, s105, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, vcc_lo, v2
-// W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, vcc_hi, v2
-// W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, ttmp15, v2
-// W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, m0, v2
-// W64: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, m0, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, exec_lo, v2
-// W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, exec_hi, v2
-// W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, null, v2
-// W64: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, null, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, -1, v2
-// W64: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, -1, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, 0.5, v2
-// W64: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, 0.5, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, src_scc, v2
-// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
+v_cmp_neq_f16 vcc, src_scc, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_neq_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_neq_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.h, v2.l
+// W64: v_cmp_neq_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1b,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, src_scc, v2.h
+// W64: v_cmp_neq_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1b,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0.5, v127.l
+// W64: v_cmp_neq_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x1a,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2
@@ -7236,124 +7464,124 @@ v_cmp_neq_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_neq_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v127, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, s1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, s105, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, m0, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, null, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, -1, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x12,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nge_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2
-// W64: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v127, v2
-// W64: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
+v_cmp_nge_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, s1, v2
-// W64: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, s1, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, s105, v2
-// W64: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, s105, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, ttmp15, v2
-// W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, m0, v2
-// W64: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, m0, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, exec_lo, v2
-// W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, exec_hi, v2
-// W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, null, v2
-// W64: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, null, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, -1, v2
-// W64: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, -1, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, 0.5, v2
-// W64: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, src_scc, v2
-// W64: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
+v_cmp_nge_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nge_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x12,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nge_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2
@@ -7572,124 +7800,124 @@ v_cmp_nge_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nge_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v127, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, s1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, s105, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, m0, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, null, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, -1, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x16,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v127, v2
-// W64: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
+v_cmp_ngt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, s1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, s1, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, s105, v2
-// W64: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, s105, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, ttmp15, v2
-// W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, m0, v2
-// W64: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, m0, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, exec_lo, v2
-// W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, exec_hi, v2
-// W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, null, v2
-// W64: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, null, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, -1, v2
-// W64: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, -1, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, 0.5, v2
-// W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, src_scc, v2
-// W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
+v_cmp_ngt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x16,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_ngt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2
@@ -7908,124 +8136,124 @@ v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_ngt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v127, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, s1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, s105, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, m0, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, null, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, -1, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x18,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2
-// W64: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v127, v2
-// W64: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
+v_cmp_nle_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, s1, v2
-// W64: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, s1, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, s105, v2
-// W64: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, s105, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, ttmp15, v2
-// W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, m0, v2
-// W64: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, m0, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, exec_lo, v2
-// W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, exec_hi, v2
-// W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, null, v2
-// W64: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, null, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, -1, v2
-// W64: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, -1, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, 0.5, v2
-// W64: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, src_scc, v2
-// W64: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
+v_cmp_nle_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nle_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x18,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nle_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2
@@ -8244,124 +8472,124 @@ v_cmp_nle_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nle_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v127, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, s1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, s105, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, m0, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, null, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, -1, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x14,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v127, v2
-// W64: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
+v_cmp_nlg_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, s1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, s1, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, s105, v2
-// W64: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, s105, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, ttmp15, v2
-// W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, m0, v2
-// W64: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, m0, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, exec_lo, v2
-// W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, exec_hi, v2
-// W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, null, v2
-// W64: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, null, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, -1, v2
-// W64: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, -1, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, 0.5, v2
-// W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, src_scc, v2
-// W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
+v_cmp_nlg_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x14,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlg_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2
@@ -8580,124 +8808,124 @@ v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nlg_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v127, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, s1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, s105, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, m0, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, null, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, null, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, -1, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1c,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, v1.l, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v127, v2
-// W64: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, v127.l, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, s1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, s1, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, s105, v2
-// W64: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, s105, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, vcc_lo, v2
-// W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, vcc_hi, v2
-// W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, ttmp15, v2
-// W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, m0, v2
-// W64: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, m0, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, exec_lo, v2
-// W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, exec_hi, v2
-// W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, null, v2
-// W64: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, null, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, -1, v2
-// W64: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, -1, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, 0.5, v2
-// W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, 0.5, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, src_scc, v2
-// W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
+v_cmp_nlt_f16 vcc, src_scc, v2.l
+// W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1c,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_nlt_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2
@@ -8916,124 +9144,124 @@ v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_nlt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v127, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, s1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, s105, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, m0, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, null, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, null, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, -1, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x0e,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_o_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2
-// W64: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc, v1.l, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v127, v2
-// W64: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
+v_cmp_o_f16 vcc, v127.l, v2.l
+// W64: v_cmp_o_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, s1, v2
-// W64: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, s1, v2.l
+// W64: v_cmp_o_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, s105, v2
-// W64: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, s105, v2.l
+// W64: v_cmp_o_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, vcc_lo, v2
-// W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, vcc_hi, v2
-// W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, ttmp15, v2
-// W64: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_o_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, m0, v2
-// W64: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, m0, v2.l
+// W64: v_cmp_o_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, exec_lo, v2
-// W64: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_o_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, exec_hi, v2
-// W64: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_o_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, null, v2
-// W64: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, null, v2.l
+// W64: v_cmp_o_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, -1, v2
-// W64: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, -1, v2.l
+// W64: v_cmp_o_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, 0.5, v2
-// W64: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, 0.5, v2.l
+// W64: v_cmp_o_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, src_scc, v2
-// W64: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
+v_cmp_o_f16 vcc, src_scc, v2.l
+// W64: v_cmp_o_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x0e,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_o_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2
@@ -9252,124 +9480,156 @@ v_cmp_o_f64 vcc, 0xaf123456, v[254:255]
 // W64: v_cmp_o_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v127, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, v127.l, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, s1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, s1, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, s105, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, s105, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, vcc_lo, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, vcc_lo, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, vcc_hi, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, vcc_hi, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, ttmp15, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, ttmp15, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, m0, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, m0, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, exec_lo, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, exec_lo, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, exec_hi, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, exec_hi, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, null, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, null, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, -1, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, -1, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, 0.5, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, 0.5, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, src_scc, v2
-// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc_lo, src_scc, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x10,0x7c]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, 0xfe0b, v127
-// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127.l
+// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2
-// W64: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc, v1.l, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v127, v2
-// W64: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
+v_cmp_u_f16 vcc, v127.l, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, s1, v2
-// W64: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, s1, v2.l
+// W64: v_cmp_u_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, s105, v2
-// W64: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, s105, v2.l
+// W64: v_cmp_u_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, vcc_lo, v2
-// W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, vcc_lo, v2.l
+// W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, vcc_hi, v2
-// W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, vcc_hi, v2.l
+// W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, ttmp15, v2
-// W64: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, ttmp15, v2.l
+// W64: v_cmp_u_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, m0, v2
-// W64: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, m0, v2.l
+// W64: v_cmp_u_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, exec_lo, v2
-// W64: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, exec_lo, v2.l
+// W64: v_cmp_u_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, exec_hi, v2
-// W64: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, exec_hi, v2.l
+// W64: v_cmp_u_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, null, v2
-// W64: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, null, v2.l
+// W64: v_cmp_u_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, -1, v2
-// W64: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, -1, v2.l
+// W64: v_cmp_u_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, 0.5, v2
-// W64: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, 0.5, v2.l
+// W64: v_cmp_u_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, src_scc, v2
-// W64: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
+v_cmp_u_f16 vcc, src_scc, v2.l
+// W64: v_cmp_u_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x10,0x7c]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, 0xfe0b, v127
-// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+v_cmp_u_f16 vcc, 0xfe0b, v127.l
+// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1.h, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127.h, v2.l
+// W32: v_cmp_u_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.h, v2.l
+// W64: v_cmp_u_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, src_scc, v2.h
+// W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x11,0x7c]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, src_scc, v2.h
+// W64: v_cmp_u_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x11,0x7c]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127.h
+// W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0xfe0b, v127.h
+// W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
index 15ed1a23e2ec7..03462fbce9ea1 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
@@ -4,116 +4,212 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x35,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -228,116 +324,212 @@ v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound
 // W64: v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -1092,116 +1284,212 @@ v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -1956,116 +2244,212 @@ v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -2820,116 +3204,212 @@ v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -3684,136 +4164,232 @@ v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror
+// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1
+// W32: v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:15
@@ -5508,116 +6084,212 @@ v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
 // W64: v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -5732,116 +6404,212 @@ v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -5956,116 +6724,212 @@ v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6180,116 +7044,212 @@ v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6404,116 +7364,212 @@ v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6628,116 +7684,212 @@ v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -6852,116 +8004,212 @@ v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 boun
 // W64: v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
@@ -7076,116 +8324,212 @@ v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_
 // W64: v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_mirror
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W32: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W32: v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_mirror
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
-// W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_mirror
-// W64: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// W64: v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_half_mirror
-// W64: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shl:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shl:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shr:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_shr:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_ror:1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_ror:15
-// W64: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// W64: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xf5,0x30]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
index 4942cf25e0458..bf52378890fb5 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
@@ -4,28 +4,44 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -52,28 +68,44 @@ v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -228,28 +260,44 @@ v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -404,28 +452,44 @@ v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -580,28 +644,44 @@ v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -756,28 +836,44 @@ v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1124,28 +1220,44 @@ v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1172,28 +1284,44 @@ v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1220,28 +1348,44 @@ v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1268,28 +1412,44 @@ v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1316,28 +1476,44 @@ v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1364,28 +1540,44 @@ v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1412,28 +1604,44 @@ v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
@@ -1460,28 +1668,44 @@ v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // W64: v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W32: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W32: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
 // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
+// W64: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// W64: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
 // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
index ef38afe7385a2..2e785a07f8133 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
@@ -1,128 +1,251 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s
 
-v_cmp_class_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -388,71 +511,137 @@ v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ge_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -718,115 +907,181 @@ v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16_e32 vcc, v1.h, v255.h
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v1.l, v255.l
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.h, v255.h
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.l, v255.l
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v128.h, v2.h
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h
 // GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+v_cmp_gt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc, v128.l, v2.l
@@ -1048,71 +1303,137 @@ v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -1378,71 +1699,137 @@ v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
@@ -2104,530 +2491,1058 @@ v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255.h
 v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255.l
 // GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nle_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nle_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nle_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_o_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_u_f16_e32 vcc, v1.l, v255.l
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:22: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l
 // GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255.h
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255.l
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
index 36aec4e0617c4..2005f4823b65b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
@@ -4,169 +4,333 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
-v_cmp_class_f16 vcc, v1, v255
-// W64: v_cmp_class_f16_e64 vcc, v1, v255       ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc, v1.h, v255.h
+// W64: v_cmp_class_f16_e64 vcc, v1.h, v255.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1.l, v255.l
+// W64: v_cmp_class_f16_e64 vcc, v1.l, v255.l   ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.h, v255.h
+// W64: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.l, v255.l
+// W64: v_cmp_class_f16_e64 vcc, v127.l, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v128.h, v2.h
+// W64: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, v128.l, v2.l
+// W64: v_cmp_class_f16_e64 vcc, v128.l, v2.l   ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v127, v255
-// W64: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v128, v2
-// W64: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, vcc_hi, v255
-// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_class_f16_e64 vcc_lo, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_class_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_class_f16_e64 vcc_lo, v128.h, v2.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc, vcc_lo, v255
-// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_class_f16_e64 vcc_lo, v128.l, v2.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v127, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, v127, v255  ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v128, v2
-// W32: v_cmp_class_f16_e64 vcc_lo, v128, v2    ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_class_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_class_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_class_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc, v1.h, v255.h
+// W64: v_cmp_eq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1.l, v255.l
+// W64: v_cmp_eq_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.h, v255.h
+// W64: v_cmp_eq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.l, v255.l
+// W64: v_cmp_eq_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc, v128.h, v2.h
+// W64: v_cmp_eq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v1, v255
-// W64: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc, v128.l, v2.l
+// W64: v_cmp_eq_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v127, v255
-// W64: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v128, v2
-// W64: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, vcc_hi, v255
-// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_eq_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_eq_f16_e64 vcc_lo, v1.l, v255.l   ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_eq_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_eq_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, vcc_lo, v255
-// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v1, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_eq_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v127, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_eq_f16_e64 vcc_lo, v128.l, v2.l   ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v128, v2
-// W32: v_cmp_eq_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_eq_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_eq_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_eq_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_i16 vcc, v1.h, v255.h
 // W64: v_cmp_eq_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x32,0xd4,0x01,0xff,0x03,0x00]
@@ -520,93 +684,181 @@ v_cmp_eq_u16 vcc_lo, vcc_lo, v255.l
 // W32: v_cmp_eq_u16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v1, v255
-// W64: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc, v1.h, v255.h
+// W64: v_cmp_ge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1.l, v255.l
+// W64: v_cmp_ge_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.h, v255.h
+// W64: v_cmp_ge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.l, v255.l
+// W64: v_cmp_ge_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v128.h, v2.h
+// W64: v_cmp_ge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, v128.l, v2.l
+// W64: v_cmp_ge_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v127, v255
-// W64: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v128, v2
-// W64: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, vcc_hi, v255
-// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_ge_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_ge_f16_e64 vcc_lo, v1.l, v255.l   ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_ge_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_ge_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc, vcc_lo, v255
-// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v1, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_ge_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v127, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_ge_f16_e64 vcc_lo, v128.l, v2.l   ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v128, v2
-// W32: v_cmp_ge_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_ge_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_ge_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ge_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ge_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_i16 vcc, v1.h, v255.h
 // W64: v_cmp_ge_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x36,0xd4,0x01,0xff,0x03,0x00]
@@ -960,93 +1212,181 @@ v_cmp_ge_u16 vcc_lo, vcc_lo, v255.l
 // W32: v_cmp_ge_u16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v1, v255
-// W64: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v1.h, v255.h
+// W64: v_cmp_gt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1.l, v255.l
+// W64: v_cmp_gt_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.h, v255.h
+// W64: v_cmp_gt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.l, v255.l
+// W64: v_cmp_gt_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v128.h, v2.h
+// W64: v_cmp_gt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, v128.l, v2.l
+// W64: v_cmp_gt_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v127, v255
-// W64: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v128, v2
-// W64: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_gt_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_gt_f16_e64 vcc_lo, v1.l, v255.l   ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_gt_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_gt_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v1, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_gt_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v127, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_gt_f16_e64 vcc_lo, v128.l, v2.l   ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v128, v2
-// W32: v_cmp_gt_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_gt_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_gt_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_gt_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_i16 vcc, v1.h, v255.h
 // W64: v_cmp_gt_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x34,0xd4,0x01,0xff,0x03,0x00]
@@ -1400,143 +1740,231 @@ v_cmp_gt_u16 vcc_lo, vcc_lo, v255.l
 // W32: v_cmp_gt_u16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v1, v255
-// W64: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v1.h, v255.h
+// W64: v_cmp_le_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v127, v255
-// W64: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v255.l
+// W64: v_cmp_le_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v128, v2
-// W64: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc, vcc_hi, v255
-// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc, vcc_lo, v255
-// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc_lo, v1, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v127, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v128, v2
-// W32: v_cmp_le_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+v_cmp_le_f16 vcc, v127.h, v255.h
+// W64: v_cmp_le_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+v_cmp_le_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_le_f16 vcc, v127.l, v255.l
+// W64: v_cmp_le_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.h, v255.h
-// W64: v_cmp_le_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v128.h, v2.h
+// W64: v_cmp_le_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.l, v255.l
-// W64: v_cmp_le_i16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, v128.l, v2.l
+// W64: v_cmp_le_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.h, v255.h
-// W64: v_cmp_le_i16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.l, v255.l
-// W64: v_cmp_le_i16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
+v_cmp_le_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_le_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_le_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_le_i16 vcc, v128.h, v2.h
+v_cmp_le_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_le_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_le_f16_e64 vcc_lo, v1.l, v255.l   ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_le_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_le_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_le_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_le_f16_e64 vcc_lo, v128.l, v2.l   ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.h, v255.h
+// W64: v_cmp_le_i16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.l, v255.l
+// W64: v_cmp_le_i16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.h, v255.h
+// W64: v_cmp_le_i16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.l, v255.l
+// W64: v_cmp_le_i16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v128.h, v2.h
 // W64: v_cmp_le_i16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x33,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
@@ -1840,93 +2268,181 @@ v_cmp_le_u16 vcc_lo, vcc_lo, v255.l
 // W32: v_cmp_le_u16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v1, v255
-// W64: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc, v1.h, v255.h
+// W64: v_cmp_lg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1.l, v255.l
+// W64: v_cmp_lg_f16_e64 vcc, v1.l, v255.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.h, v255.h
+// W64: v_cmp_lg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.l, v255.l
+// W64: v_cmp_lg_f16_e64 vcc, v127.l, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v128.h, v2.h
+// W64: v_cmp_lg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, v128.l, v2.l
+// W64: v_cmp_lg_f16_e64 vcc, v128.l, v2.l      ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v127, v255
-// W64: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v128, v2
-// W64: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255.l    ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, vcc_hi, v255
-// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_lg_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_lg_f16_e64 vcc_lo, v1.l, v255.l   ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_lg_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_lg_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc, vcc_lo, v255
-// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v1, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_lg_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v127, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_lg_f16_e64 vcc_lo, v128.l, v2.l   ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v128, v2
-// W32: v_cmp_lg_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_lg_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_lg_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_lg_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_lg_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_f16 vcc, v1.h, v255.h
 // W64: v_cmp_lt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x01,0xd4,0x01,0xff,0x03,0x00]
@@ -2808,706 +3324,1410 @@ v_cmp_ne_u16 vcc_lo, vcc_lo, v255.l
 // W32: v_cmp_ne_u16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v1, v255
-// W64: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v1.h, v255.h
+// W64: v_cmp_neq_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v127, v255
-// W64: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v255.l
+// W64: v_cmp_neq_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v128, v2
-// W64: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc, vcc_hi, v255
-// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc, vcc_lo, v255
-// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc_lo, v1, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v127, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v128, v2
-// W32: v_cmp_neq_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc, v127.h, v255.h
+// W64: v_cmp_neq_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_neq_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, v127.l, v255.l
+// W64: v_cmp_neq_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v1, v255
-// W64: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v128.h, v2.h
+// W64: v_cmp_neq_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v127, v255
-// W64: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, v128.l, v2.l
+// W64: v_cmp_neq_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v128, v2
-// W64: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v1, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_neq_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x01,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v127, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_neq_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v128, v2
-// W32: v_cmp_nge_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nge_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16 vcc, v1, v255
-// W64: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v127, v255
-// W64: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_neq_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, v128, v2
-// W64: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_neq_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_neq_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_neq_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v1, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_neq_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v127, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v128, v2
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_neq_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_neq_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_neq_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.h, v255.h
+// W64: v_cmp_nge_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_ngt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v1, v255
-// W64: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v1.l, v255.l
+// W64: v_cmp_nge_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v127, v255
-// W64: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v127.h, v255.h
+// W64: v_cmp_nge_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v128, v2
-// W64: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc, v127.l, v255.l
+// W64: v_cmp_nge_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v128.h, v2.h
+// W64: v_cmp_nge_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v1, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, v128.l, v2.l
+// W64: v_cmp_nge_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v127, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v128, v2
-// W32: v_cmp_nle_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_nge_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nle_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v1, v255
-// W64: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_nge_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v127, v255
-// W64: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_nge_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v128, v2
-// W64: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_nge_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_nge_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_nge_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v1, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_nge_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v127, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v128, v2
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nge_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nge_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x09,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nge_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v1.h, v255.h
+// W64: v_cmp_ngt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlg_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v1, v255
-// W64: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v1.l, v255.l
+// W64: v_cmp_ngt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v127, v255
-// W64: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v127.h, v255.h
+// W64: v_cmp_ngt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v128, v2
-// W64: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v127.l, v255.l
+// W64: v_cmp_ngt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_ngt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v128.h, v2.h
+// W64: v_cmp_ngt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v1, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_ngt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+v_cmp_ngt_f16 vcc, v128.l, v2.l
+// W64: v_cmp_ngt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v127, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_ngt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_ngt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.h, v255.h
+// W64: v_cmp_nle_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.l, v255.l
+// W64: v_cmp_nle_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.h, v255.h
+// W64: v_cmp_nle_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.l, v255.l
+// W64: v_cmp_nle_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.h, v2.h
+// W64: v_cmp_nle_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.l, v2.l
+// W64: v_cmp_nle_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_nle_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_nle_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_nle_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v128, v2
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nle_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_nle_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_nle_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_nle_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_nle_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h
+// W64: v_cmp_nlg_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l
+// W64: v_cmp_nlg_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h
+// W64: v_cmp_nlg_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l
+// W64: v_cmp_nlg_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h
+// W64: v_cmp_nlg_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l
+// W64: v_cmp_nlg_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_nlg_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 // W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_nlt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v1, v255
-// W64: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_nlg_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_nlg_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.h, v255.h
+// W64: v_cmp_nlt_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.l, v255.l
+// W64: v_cmp_nlt_f16_e64 vcc, v1.l, v255.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.h, v255.h
+// W64: v_cmp_nlt_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.l, v255.l
+// W64: v_cmp_nlt_f16_e64 vcc, v127.l, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.h, v2.h
+// W64: v_cmp_nlt_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.l, v2.l
+// W64: v_cmp_nlt_f16_e64 vcc, v128.l, v2.l     ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255.l   ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v1.l, v255.l  ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v127.l, v255.l ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v128.l, v2.l  ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v1.h, v255.h
+// W64: v_cmp_o_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v127, v255
-// W64: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v1.l, v255.l
+// W64: v_cmp_o_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v128, v2
-// W64: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v127.h, v255.h
+// W64: v_cmp_o_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, vcc_hi, v255
-// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc, vcc_lo, v255
-// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc, v127.l, v255.l
+// W64: v_cmp_o_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v1, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, v1, v255        ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.h, v2.h
+// W64: v_cmp_o_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.l, v2.l
+// W64: v_cmp_o_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_o_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_o_f16_e64 vcc_lo, v1.l, v255.l    ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_o_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_o_f16_e64 vcc_lo, v127.l, v255.l  ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v127, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, v127, v255      ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_o_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v128, v2
-// W32: v_cmp_o_f16_e64 vcc_lo, v128, v2        ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_o_f16_e64 vcc_lo, v128.l, v2.l    ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_o_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255    ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_o_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255    ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v1, v255
-// W64: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_o_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255.l  ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_o_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255.l  ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v1.h, v255.h
+// W64: v_cmp_u_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v127, v255
-// W64: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc, v1.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v1.l, v255.l
+// W64: v_cmp_u_f16_e64 vcc, v1.l, v255.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v128, v2
-// W64: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v127.h, v255.h
+// W64: v_cmp_u_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, vcc_hi, v255
-// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc, v127.h, v255.h quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc, vcc_lo, v255
-// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
-// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc, v127.l, v255.l
+// W64: v_cmp_u_f16_e64 vcc, v127.l, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v1, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, v1, v255        ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128.h, v2.h
+// W64: v_cmp_u_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128.h, v2.h quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128.l, v2.l
+// W64: v_cmp_u_f16_e64 vcc, v128.l, v2.l       ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+v_cmp_u_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, vcc_hi, v255.h
+// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, vcc_hi, v255.l
+// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, vcc_lo, v255.h
+// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, vcc_lo, v255.l
+// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255.l     ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v1.h, v255.h
+// W32: v_cmp_u_f16_e64 vcc_lo, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x01,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v127, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, v127, v255      ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v1.l, v255.l
+// W32: v_cmp_u_f16_e64 vcc_lo, v1.l, v255.l    ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v128, v2
-// W32: v_cmp_u_f16_e64 vcc_lo, v128, v2        ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+v_cmp_u_f16 vcc_lo, v127.h, v255.h
+// W32: v_cmp_u_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x7f,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+v_cmp_u_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 // W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255    ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255    ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
-// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v127.l, v255.l
+// W32: v_cmp_u_f16_e64 vcc_lo, v127.l, v255.l  ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.h, v2.h
+// W32: v_cmp_u_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x18,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.l, v2.l
+// W32: v_cmp_u_f16_e64 vcc_lo, v128.l, v2.l    ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, vcc_hi, v255.h
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, vcc_hi, v255.l
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255.l  ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, vcc_lo, v255.h
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc_lo, vcc_lo, v255.l
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255.l  ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopc.txt
index bca59aa19a7d8..ea5ad443533f6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopc.txt
@@ -5,59 +5,100 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
 0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# W32: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-# W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
 
 0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+
+0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_class_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -115,59 +156,100 @@
 # GFX11: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_eq_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -527,59 +609,100 @@
 # GFX11: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_f_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_f_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x00,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_f_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_f_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x00,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x10,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_f_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x10,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -747,59 +870,100 @@
 # GFX11: v_cmp_f_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x48,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ge_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1159,59 +1323,100 @@
 # GFX11: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_gt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1571,59 +1776,100 @@
 # GFX11: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_le_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1983,59 +2229,100 @@
 # GFX11: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_lg_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2848,59 +3135,100 @@
 # GFX11: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_neq_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2958,59 +3286,100 @@
 # GFX11: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nge_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3068,59 +3437,100 @@
 # GFX11: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ngt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3178,59 +3588,100 @@
 # GFX11: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nle_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3288,59 +3739,100 @@
 # GFX11: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlg_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3398,59 +3890,100 @@
 # GFX11: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3508,59 +4041,100 @@
 # GFX11: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_o_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3618,59 +4192,100 @@
 # GFX11: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_t_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_t_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0f,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_t_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_t_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0f,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_t_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3838,59 +4453,100 @@
 # GFX11: v_cmp_t_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4f,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_u_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopc.txt
index 8551e018b8300..ae96905785cf6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopc.txt
@@ -5,23 +5,46 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_class_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -43,23 +66,46 @@
 # GFX11: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_eq_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -203,23 +249,46 @@
 # GFX11: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_f_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_f_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_f_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_f_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_f_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x00,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_f_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_f_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x00,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x00,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x10,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_f_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x10,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -279,23 +348,46 @@
 # GFX11: v_cmp_f_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x48,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_ge_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -439,23 +531,46 @@
 # GFX11: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_gt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -599,23 +714,46 @@
 # GFX11: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_le_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -759,23 +897,46 @@
 # GFX11: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_lg_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1102,23 +1263,46 @@
 # GFX11: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_neq_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1140,23 +1324,46 @@
 # GFX11: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nge_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1178,23 +1385,46 @@
 # GFX11: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_ngt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1216,23 +1446,46 @@
 # GFX11: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nle_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1254,23 +1507,46 @@
 # GFX11: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlg_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1292,23 +1568,46 @@
 # GFX11: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1330,23 +1629,46 @@
 # GFX11: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_o_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1368,23 +1690,46 @@
 # GFX11: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_t_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_t_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_t_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_t_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_t_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0f,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_t_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_t_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0f,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0f,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_t_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1f,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1444,23 +1789,46 @@
 # GFX11: v_cmp_t_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4f,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_u_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopc.txt
index 8b9b0a819676f..987ed9aaf9e45 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopc.txt
@@ -5,24 +5,34 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, v1.l, v2.l     ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v1.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00
-# W32: v_cmp_class_f16_e64 s10, v1, 0.5        ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], v1, 0.5   ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, v1.l, 0.5      ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v1, 0.5        ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v1.l, 0.5 ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v1, 0.5   ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
 
 0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, v255.l, v2.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v255.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, s1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, s1, v2.l       ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s1, v2.l  ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00
-# W32: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, s105, v255.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s105, v255.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00
 # W32: v_cmp_class_f16_e64 s10, vcc_lo, s2     ; encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
@@ -67,6 +77,18 @@
 0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi ; encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00
+# W32-REAL16: v_cmp_class_f16_e64 s10, v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+
+0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00
+# W32-REAL16: v_cmp_class_f16_e64 s10, s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+
 0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_class_f32_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_class_f32_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
@@ -186,12 +208,16 @@
 # GFX11: v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 ; encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_eq_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
@@ -244,6 +270,18 @@
 0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_eq_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_eq_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
@@ -713,12 +751,16 @@
 # GFX11: v_cmp_eq_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_f_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_f_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_f_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_f_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_f_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_f_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_f_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_f_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x00,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_f_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x04,0x00,0x00]
@@ -771,6 +813,18 @@
 0x7c,0x83,0x00,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_f_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x00,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x00,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_f_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_f_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x00,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x00,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_f_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_f_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_f_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x00,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_f_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_f_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_f_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
@@ -1090,12 +1144,16 @@
 # GFX11: v_cmp_f_u64_e64 null, 0xaf123456, vcc   ; encoding: [0x7c,0x00,0x58,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_ge_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
@@ -1148,6 +1206,18 @@
 0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_ge_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_ge_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
@@ -1617,12 +1687,16 @@
 # GFX11: v_cmp_ge_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_gt_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
@@ -1675,6 +1749,18 @@
 0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_gt_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_gt_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
@@ -2144,12 +2230,16 @@
 # GFX11: v_cmp_gt_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_le_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_le_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_le_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
@@ -2202,6 +2292,18 @@
 0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_le_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_le_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_le_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_le_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
@@ -2671,12 +2773,16 @@
 # GFX11: v_cmp_le_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_lg_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
@@ -2729,6 +2835,18 @@
 0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_lg_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_lg_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
@@ -3741,12 +3859,16 @@
 # GFX11: v_cmp_ne_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_neq_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
@@ -3799,6 +3921,18 @@
 0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_neq_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_neq_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
@@ -3906,12 +4040,16 @@
 # GFX11: v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nge_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
@@ -3964,6 +4102,18 @@
 0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nge_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nge_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
@@ -4071,12 +4221,16 @@
 # GFX11: v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_ngt_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
@@ -4129,6 +4283,18 @@
 0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_ngt_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_ngt_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
@@ -4236,12 +4402,16 @@
 # GFX11: v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nle_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
@@ -4294,6 +4464,18 @@
 0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nle_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nle_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
@@ -4401,12 +4583,16 @@
 # GFX11: v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nlg_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
@@ -4459,6 +4645,18 @@
 0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nlg_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nlg_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
@@ -4566,12 +4764,16 @@
 # GFX11: v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nlt_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
@@ -4624,6 +4826,18 @@
 0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nlt_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nlt_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
@@ -4731,12 +4945,16 @@
 # GFX11: v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_o_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_o_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_o_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
@@ -4789,6 +5007,18 @@
 0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_o_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_o_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_o_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_o_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
@@ -4896,12 +5126,16 @@
 # GFX11: v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_t_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_t_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_t_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_t_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_t_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_t_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_t_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_t_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_t_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00]
@@ -4954,6 +5188,18 @@
 0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0f,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_t_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_t_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0f,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0f,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_t_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_t_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_t_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0f,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_t_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_t_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_t_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
@@ -5273,12 +5519,16 @@
 # GFX11: v_cmp_t_u64_e64 null, 0xaf123456, vcc   ; encoding: [0x7c,0x00,0x5f,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_u_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_u_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_u_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
@@ -5331,6 +5581,18 @@
 0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX11: v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_u_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_u_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+
 0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_u_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_u_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
index 06e83ebed9292..64f16e44065a8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
@@ -5,64 +5,124 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0x01,0x05,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v1.l, v2.l  ; encoding: [0x01,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v1.l, v2.l     ; encoding: [0x01,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
 
 0x7f,0x05,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v127.l, v2.l ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v127.l, v2.l   ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
 
 0x01,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, s1, v2.l    ; encoding: [0x01,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, s1, v2.l       ; encoding: [0x01,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
 
 0x69,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, s105, v2.l  ; encoding: [0x69,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, s105, v2.l     ; encoding: [0x69,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
 
 0x6a,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2.l ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
 
 0x6b,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2.l ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
 
 0x7b,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2.l ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, ttmp15, v2.l   ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
 
 0x7d,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, m0, v2.l    ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, m0, v2.l       ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
 
 0x7e,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, exec_lo, v2.l  ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
 
 0x7f,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, exec_hi, v2.l  ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
 
 0x7c,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, null, v2.l  ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, null, v2.l     ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
 
 0xc1,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, -1, v2.l    ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, -1, v2.l       ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
 
 0xf0,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0.5, v2.l   ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0.5, v2.l      ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
 
 0xfd,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, src_scc, v2.l  ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
 
 0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0xfa,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v1.h, v2.l  ; encoding: [0x81,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v1.h, v2.l     ; encoding: [0x81,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0xfa,0x7c]
+
+0xff,0x05,0xfa,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v127.h, v2.l ; encoding: [0xff,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v127.h, v2.l   ; encoding: [0xff,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0xfa,0x7c]
+
+0xf0,0xfe,0xfa,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0.5, v127.l ; encoding: [0xf0,0xfe,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0.5, v127.l    ; encoding: [0xf0,0xfe,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0.5, v127   ; encoding: [0xf0,0xfe,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0.5, v127      ; encoding: [0xf0,0xfe,0xfa,0x7c]
+
+0xfd,0x04,0xfb,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, src_scc, v2.h  ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0xfb,0x7c]
+
+0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0xfc,0x7c
 # W32: v_cmp_class_f32_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfc,0x7c]
@@ -173,64 +233,124 @@
 # W64: v_cmp_class_f64_e32 vcc, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
 
 0x7f,0x05,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
 
 0x01,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
 
 0x69,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
 
 0x6a,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
 
 0x6b,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
 
 0x7b,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
 
 0x7d,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
 
 0x7e,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
 
 0x7f,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
 
 0x7c,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
 
 0xc1,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
 
 0xf0,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
 
 0xfd,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
 
 0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x04,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x04,0x7c]
+
+0xff,0x05,0x04,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x04,0x7c]
+
+0xf0,0xfe,0x04,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v127      ; encoding: [0xf0,0xfe,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0.5, v127         ; encoding: [0xf0,0xfe,0x04,0x7c]
+
+0xfd,0x04,0x05,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x05,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x05,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x05,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x05,0x7c]
+
+0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x24,0x7c
 # W32: v_cmp_eq_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x24,0x7c]
@@ -797,64 +917,124 @@
 # W64: v_cmp_eq_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x00,0x7c]
 
 0x7f,0x05,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x00,0x7c]
 
 0x01,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x00,0x7c]
 
 0x69,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x00,0x7c]
 
 0x6a,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x00,0x7c]
 
 0x6b,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x00,0x7c]
 
 0x7b,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x00,0x7c]
 
 0x7d,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x00,0x7c]
 
 0x7e,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x00,0x7c]
 
 0x7f,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x00,0x7c]
 
 0x7c,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x00,0x7c]
 
 0xc1,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x00,0x7c]
 
 0xf0,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x00,0x7c]
 
 0xfd,0x04,0x00,0x7c
-# W32: v_cmp_f_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x00,0x7c]
-# W64: v_cmp_f_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x00,0x7c]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x00,0x7c]
 
 0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_f_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x00,0x7c
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x00,0x7c]
+
+0xff,0x05,0x00,0x7c
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x00,0x7c]
+
+0xf0,0xfe,0x00,0x7c
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x00,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x00,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, 0.5, v127       ; encoding: [0xf0,0xfe,0x00,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, 0.5, v127          ; encoding: [0xf0,0xfe,0x00,0x7c]
+
+0xfd,0x04,0x01,0x7c
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x01,0x7c]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x01,0x7c]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x01,0x7c]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x01,0x7c]
+
+0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x01,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x20,0x7c
 # W32: v_cmp_f_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x20,0x7c]
@@ -1181,64 +1361,124 @@
 # W64: v_cmp_f_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
 
 0x7f,0x05,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
 
 0x01,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
 
 0x69,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
 
 0x6a,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
 
 0x6b,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
 
 0x7b,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
 
 0x7d,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
 
 0x7e,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
 
 0x7f,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
 
 0x7c,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
 
 0xc1,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
 
 0xf0,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
 
 0xfd,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
 
 0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0c,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0c,0x7c]
+
+0xff,0x05,0x0c,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0c,0x7c]
+
+0xf0,0xfe,0x0c,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v127      ; encoding: [0xf0,0xfe,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0.5, v127         ; encoding: [0xf0,0xfe,0x0c,0x7c]
+
+0xfd,0x04,0x0d,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0d,0x7c]
+
+0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2c,0x7c
 # W32: v_cmp_ge_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x2c,0x7c]
@@ -1805,64 +2045,124 @@
 # W64: v_cmp_ge_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
 
 0x7f,0x05,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
 
 0x01,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
 
 0x69,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
 
 0x6a,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
 
 0x6b,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
 
 0x7b,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
 
 0x7d,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
 
 0x7e,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
 
 0x7f,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
 
 0x7c,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
 
 0xc1,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
 
 0xf0,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
 
 0xfd,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
 
 0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x08,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x08,0x7c]
+
+0xff,0x05,0x08,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x08,0x7c]
+
+0xf0,0xfe,0x08,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v127      ; encoding: [0xf0,0xfe,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0.5, v127         ; encoding: [0xf0,0xfe,0x08,0x7c]
+
+0xfd,0x04,0x09,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x09,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x09,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x09,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x09,0x7c]
+
+0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x28,0x7c
 # W32: v_cmp_gt_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x28,0x7c]
@@ -2429,64 +2729,124 @@
 # W64: v_cmp_gt_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
 
 0x7f,0x05,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
 
 0x01,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
 
 0x69,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
 
 0x6a,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
 
 0x6b,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
 
 0x7b,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
 
 0x7d,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
 
 0x7e,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
 
 0x7f,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
 
 0x7c,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
 
 0xc1,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
 
 0xf0,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
 
 0xfd,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
 
 0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x06,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x06,0x7c]
+
+0xff,0x05,0x06,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x06,0x7c]
+
+0xf0,0xfe,0x06,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0.5, v127      ; encoding: [0xf0,0xfe,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0.5, v127         ; encoding: [0xf0,0xfe,0x06,0x7c]
+
+0xfd,0x04,0x07,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x07,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x07,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x07,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x07,0x7c]
+
+0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x26,0x7c
 # W32: v_cmp_le_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x26,0x7c]
@@ -3053,64 +3413,124 @@
 # W64: v_cmp_le_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
 
 0x7f,0x05,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
 
 0x01,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
 
 0x69,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
 
 0x6a,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
 
 0x6b,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
 
 0x7b,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
 
 0x7d,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
 
 0x7e,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
 
 0x7f,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
 
 0x7c,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
 
 0xc1,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
 
 0xf0,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
 
 0xfd,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
 
 0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0a,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x7c]
+
+0xff,0x05,0x0a,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x7c]
+
+0xf0,0xfe,0x0a,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v127.l    ; encoding: [0xf0,0xfe,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0.5, v127.l       ; encoding: [0xf0,0xfe,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v127      ; encoding: [0xf0,0xfe,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0.5, v127         ; encoding: [0xf0,0xfe,0x0a,0x7c]
+
+0xfd,0x04,0x0b,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0b,0x7c]
+
+0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2a,0x7c
 # W32: v_cmp_lg_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x2a,0x7c]
@@ -4381,64 +4801,124 @@
 # W64: v_cmp_ne_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
 
 0x7f,0x05,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
 
 0x01,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
 
 0x69,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
 
 0x6a,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
 
 0x6b,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
 
 0x7b,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
 
 0x7d,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
 
 0x7e,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
 
 0x7f,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
 
 0x7c,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
 
 0xc1,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
 
 0xf0,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
 
 0xfd,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
 
 0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x1a,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1a,0x7c]
+
+0xff,0x05,0x1a,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1a,0x7c]
+
+0xf0,0xfe,0x1a,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x1a,0x7c]
+
+0xfd,0x04,0x1b,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1b,0x7c]
+
+0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x3a,0x7c
 # W32: v_cmp_neq_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x3a,0x7c]
@@ -4549,64 +5029,124 @@
 # W64: v_cmp_neq_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
 
 0x7f,0x05,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
 
 0x01,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
 
 0x69,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
 
 0x6a,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
 
 0x6b,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
 
 0x7b,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
 
 0x7d,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
 
 0x7e,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
 
 0x7f,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
 
 0x7c,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
 
 0xc1,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
 
 0xf0,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
 
 0xfd,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
 
 0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x12,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x12,0x7c]
+
+0xff,0x05,0x12,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x12,0x7c]
+
+0xf0,0xfe,0x12,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x12,0x7c]
+
+0xfd,0x04,0x13,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x13,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x13,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x13,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x13,0x7c]
+
+0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x32,0x7c
 # W32: v_cmp_nge_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x32,0x7c]
@@ -4717,64 +5257,124 @@
 # W64: v_cmp_nge_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
 
 0x7f,0x05,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
 
 0x01,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
 
 0x69,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
 
 0x6a,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
 
 0x6b,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
 
 0x7b,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
 
 0x7d,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
 
 0x7e,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
 
 0x7f,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
 
 0x7c,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
 
 0xc1,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
 
 0xf0,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
 
 0xfd,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
 
 0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x16,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x16,0x7c]
+
+0xff,0x05,0x16,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x16,0x7c]
+
+0xf0,0xfe,0x16,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x16,0x7c]
+
+0xfd,0x04,0x17,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x17,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x17,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x17,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x17,0x7c]
+
+0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x36,0x7c
 # W32: v_cmp_ngt_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x36,0x7c]
@@ -4885,64 +5485,124 @@
 # W64: v_cmp_ngt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
 
 0x7f,0x05,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
 
 0x01,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
 
 0x69,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
 
 0x6a,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
 
 0x6b,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
 
 0x7b,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
 
 0x7d,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
 
 0x7e,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
 
 0x7f,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
 
 0x7c,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
 
 0xc1,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
 
 0xf0,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
 
 0xfd,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
 
 0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x18,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x18,0x7c]
+
+0xff,0x05,0x18,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x18,0x7c]
+
+0xf0,0xfe,0x18,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x18,0x7c]
+
+0xfd,0x04,0x19,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x19,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x19,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x19,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x19,0x7c]
+
+0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x38,0x7c
 # W32: v_cmp_nle_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x38,0x7c]
@@ -5053,64 +5713,124 @@
 # W64: v_cmp_nle_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
 
 0x7f,0x05,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
 
 0x01,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
 
 0x69,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
 
 0x6a,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
 
 0x6b,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
 
 0x7b,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
 
 0x7d,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
 
 0x7e,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
 
 0x7f,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
 
 0x7c,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
 
 0xc1,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
 
 0xf0,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
 
 0xfd,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
 
 0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x14,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x14,0x7c]
+
+0xff,0x05,0x14,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x14,0x7c]
+
+0xf0,0xfe,0x14,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x14,0x7c]
+
+0xfd,0x04,0x15,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x15,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x15,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x15,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x15,0x7c]
+
+0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x34,0x7c
 # W32: v_cmp_nlg_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x34,0x7c]
@@ -5221,64 +5941,124 @@
 # W64: v_cmp_nlg_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
 
 0x7f,0x05,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
 
 0x01,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
 
 0x69,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
 
 0x6a,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
 
 0x6b,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
 
 0x7b,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
 
 0x7d,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
 
 0x7e,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
 
 0x7f,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
 
 0x7c,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
 
 0xc1,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
 
 0xf0,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
 
 0xfd,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
 
 0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x1c,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1c,0x7c]
+
+0xff,0x05,0x1c,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1c,0x7c]
+
+0xf0,0xfe,0x1c,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v127.l   ; encoding: [0xf0,0xfe,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0.5, v127.l      ; encoding: [0xf0,0xfe,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v127     ; encoding: [0xf0,0xfe,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0.5, v127        ; encoding: [0xf0,0xfe,0x1c,0x7c]
+
+0xfd,0x04,0x1d,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1d,0x7c]
+
+0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x3c,0x7c
 # W32: v_cmp_nlt_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x3c,0x7c]
@@ -5389,64 +6169,124 @@
 # W64: v_cmp_nlt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
 
 0x7f,0x05,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
 
 0x01,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
 
 0x69,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
 
 0x6a,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
 
 0x6b,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
 
 0x7b,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
 
 0x7d,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
 
 0x7e,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
 
 0x7f,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
 
 0x7c,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
 
 0xc1,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
 
 0xf0,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
 
 0xfd,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
 
 0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0e,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0e,0x7c]
+
+0xff,0x05,0x0e,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0e,0x7c]
+
+0xf0,0xfe,0x0e,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0.5, v127       ; encoding: [0xf0,0xfe,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0.5, v127          ; encoding: [0xf0,0xfe,0x0e,0x7c]
+
+0xfd,0x04,0x0f,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0f,0x7c]
+
+0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2e,0x7c
 # W32: v_cmp_o_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x2e,0x7c]
@@ -5557,64 +6397,124 @@
 # W64: v_cmp_o_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x1e,0x7c]
 
 0x7f,0x05,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x1e,0x7c]
 
 0x01,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x1e,0x7c]
 
 0x69,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x1e,0x7c]
 
 0x6a,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x1e,0x7c]
 
 0x6b,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x1e,0x7c]
 
 0x7b,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x1e,0x7c]
 
 0x7d,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x1e,0x7c]
 
 0x7e,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x1e,0x7c]
 
 0x7f,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x1e,0x7c]
 
 0x7c,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x1e,0x7c]
 
 0xc1,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x1e,0x7c]
 
 0xf0,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x1e,0x7c]
 
 0xfd,0x04,0x1e,0x7c
-# W32: v_cmp_t_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x1e,0x7c]
-# W64: v_cmp_t_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x1e,0x7c]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x1e,0x7c]
 
 0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x1e,0x7c
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1e,0x7c]
+
+0xff,0x05,0x1e,0x7c
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1e,0x7c]
+
+0xf0,0xfe,0x1e,0x7c
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x1e,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x1e,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, 0.5, v127       ; encoding: [0xf0,0xfe,0x1e,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, 0.5, v127          ; encoding: [0xf0,0xfe,0x1e,0x7c]
+
+0xfd,0x04,0x1f,0x7c
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x1f,0x7c]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x1f,0x7c]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1f,0x7c]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1f,0x7c]
+
+0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1f,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x3e,0x7c
 # W32: v_cmp_t_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x3e,0x7c]
@@ -5941,64 +6841,124 @@
 # W64: v_cmp_t_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
 
 0x7f,0x05,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
 
 0x01,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
 
 0x69,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
 
 0x6a,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
 
 0x6b,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
 
 0x7b,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
 
 0x7d,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
 
 0x7e,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
 
 0x7f,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
 
 0x7c,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
 
 0xc1,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
 
 0xf0,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
 
 0xfd,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
 
 0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x10,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x10,0x7c]
+
+0xff,0x05,0x10,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x10,0x7c]
+
+0xf0,0xfe,0x10,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0.5, v127.l     ; encoding: [0xf0,0xfe,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0.5, v127.l        ; encoding: [0xf0,0xfe,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0.5, v127       ; encoding: [0xf0,0xfe,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0.5, v127          ; encoding: [0xf0,0xfe,0x10,0x7c]
+
+0xfd,0x04,0x11,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x11,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x11,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x11,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x11,0x7c]
+
+0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x30,0x7c
 # W32: v_cmp_u_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x30,0x7c]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
index 7983c2258599a..919f21299cc0d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
@@ -5,60 +5,106 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30
-# W32: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
-# W64: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W64-REAL16: v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W64-FAKE16: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+
+0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_class_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30
+# W32-REAL16: v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W64-REAL16: v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W64-FAKE16: v_cmp_class_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
 
 0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
@@ -117,60 +163,106 @@
 # W64: v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x3d,0x30]
 
 0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_eq_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
@@ -545,60 +637,106 @@
 # W64: v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_f_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_f_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_f_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x01,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_f_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x01,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_f_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_f_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_f_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x01,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff]
@@ -769,60 +907,106 @@
 # W64: v_cmp_f_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ge_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
@@ -1197,60 +1381,106 @@
 # W64: v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_gt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
@@ -1625,60 +1855,106 @@
 # W64: v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_le_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_le_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
@@ -2053,60 +2329,106 @@
 # W64: v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_lg_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
@@ -2955,60 +3277,106 @@
 # W64: v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_neq_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
@@ -3067,60 +3435,106 @@
 # W64: v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nge_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
@@ -3179,60 +3593,106 @@
 # W64: v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
@@ -3291,60 +3751,106 @@
 # W64: v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nle_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
@@ -3403,60 +3909,106 @@
 # W64: v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
@@ -3515,60 +4067,106 @@
 # W64: v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
@@ -3627,60 +4225,106 @@
 # W64: v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_o_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_o_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
@@ -3739,60 +4383,106 @@
 # W64: v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_t_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_t_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_t_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x1f,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_t_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1f,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_t_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_t_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_t_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1f,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff]
@@ -3963,60 +4653,106 @@
 # W64: v_cmp_t_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_u_f16 vcc, v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x5f,0x01,0x01]
+
+0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_u_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
index 4830bec554afc..7d9f064218fc8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
@@ -5,12 +5,34 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
@@ -21,12 +43,34 @@
 # W64: v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
@@ -113,12 +157,34 @@
 # W64: v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x01,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_f_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_f_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x01,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_f_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_f_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_f_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_f_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x01,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05]
@@ -145,12 +211,34 @@
 # W64: v_cmp_f_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
@@ -237,12 +325,34 @@
 # W64: v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
@@ -329,12 +439,34 @@
 # W64: v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
@@ -421,12 +553,34 @@
 # W64: v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
@@ -627,12 +781,34 @@
 # W64: v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
@@ -643,12 +819,34 @@
 # W64: v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
@@ -659,12 +857,34 @@
 # W64: v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
@@ -675,12 +895,34 @@
 # W64: v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
@@ -691,12 +933,34 @@
 # W64: v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
@@ -707,12 +971,34 @@
 # W64: v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
@@ -723,12 +1009,34 @@
 # W64: v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
@@ -739,12 +1047,34 @@
 # W64: v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_t_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_t_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1f,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_t_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_t_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_t_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_t_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1f,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
@@ -771,12 +1101,34 @@
 # W64: v_cmp_t_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16 vcc, v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x77,0x39,0x05]
+
+0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c.txt
index 6899016177365..a81e5620f86e4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c.txt
@@ -5,20 +5,28 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, v1.l, v2.l     ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v1.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, v255.l, v2.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v255.l, v2.l ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00
-# W32: v_cmp_class_f16_e64 s10, s1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, s1, v2.l       ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s1, v2         ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s1, v2.l  ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s1, v2    ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00
-# W32: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
-# W64: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-REAL16: v_cmp_class_f16_e64 s10, s105, v255.l   ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s105, v255.l ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 
 0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00
 # W32: v_cmp_class_f16_e64 s10, vcc_lo, s2     ; encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
@@ -63,6 +71,19 @@
 0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi ; encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00
+# W32-REAL16: v_cmp_class_f16_e64 s10, v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, v255, v2       ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], v255, v2  ; encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+
+0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00
+# W32-REAL16: v_cmp_class_f16_e64 s10, s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64 s10, s105, v255     ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-REAL16: v_cmp_class_f16_e64 s[10:11], s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64 s[10:11], s105, v255 ; encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+
+
 0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_class_f32_e64 s10, v1, v2         ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_class_f32_e64 s[10:11], v1, v2    ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
@@ -182,12 +203,16 @@
 # GFX12: v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 ; encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_eq_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
@@ -240,6 +265,19 @@
 0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x02,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_eq_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x02,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_eq_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_eq_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
@@ -711,12 +749,16 @@
 # GFX12: v_cmp_eq_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_ge_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
@@ -769,6 +811,19 @@
 0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x06,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_ge_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x06,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_ge_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_ge_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
@@ -1240,12 +1295,16 @@
 # GFX12: v_cmp_ge_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_gt_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
@@ -1298,6 +1357,19 @@
 0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x04,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_gt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x04,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_gt_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_gt_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
@@ -1769,12 +1841,16 @@
 # GFX12: v_cmp_gt_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_le_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_le_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_le_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
@@ -1827,6 +1903,19 @@
 0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_le_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x03,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_le_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_le_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x03,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_le_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_le_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
@@ -2298,12 +2387,16 @@
 # GFX12: v_cmp_le_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v1.l, v2.l        ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v1.l, v2.l   ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v255.l, v255.l    ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_lg_f16_e64 s10, s1, s2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
@@ -2356,6 +2449,19 @@
 0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x05,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_lg_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64 s10, v255, v255        ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x05,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64 s[10:11], v255, v255   ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_lg_f32_e64 s10, v1, v2            ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_lg_f32_e64 s[10:11], v1, v2       ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
@@ -3373,12 +3479,16 @@
 # GFX12: v_cmp_ne_u64_e64 null, 0xaf123456, vcc  ; encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_neq_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
@@ -3431,6 +3541,19 @@
 0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0d,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_neq_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0d,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_neq_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_neq_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
@@ -3538,12 +3661,16 @@
 # GFX12: v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nge_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
@@ -3596,6 +3723,19 @@
 0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x09,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nge_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x09,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nge_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nge_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
@@ -3703,12 +3843,16 @@
 # GFX12: v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_ngt_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
@@ -3761,6 +3905,19 @@
 0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0b,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0b,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_ngt_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_ngt_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
@@ -3868,12 +4025,16 @@
 # GFX12: v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nle_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
@@ -3926,6 +4087,19 @@
 0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0c,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nle_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0c,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nle_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nle_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
@@ -4033,12 +4207,16 @@
 # GFX12: v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nlg_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
@@ -4091,6 +4269,19 @@
 0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0a,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0a,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nlg_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nlg_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
@@ -4198,12 +4389,16 @@
 # GFX12: v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v1.l, v2.l       ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v1.l, v2.l  ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v255.l, v255.l   ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_nlt_f16_e64 s10, s1, s2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
@@ -4256,6 +4451,19 @@
 0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x0e,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64 s10, v255, v255       ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x0e,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64 s[10:11], v255, v255  ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_nlt_f32_e64 s10, v1, v2           ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_nlt_f32_e64 s[10:11], v1, v2      ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
@@ -4363,12 +4571,16 @@
 # GFX12: v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_o_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_o_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_o_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
@@ -4421,6 +4633,19 @@
 0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_o_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x07,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_o_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_o_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x07,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_o_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_o_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
@@ -4528,12 +4753,16 @@
 # GFX12: v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp ; encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
 
 0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00
-# W32: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
-# W64: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cmp_u_f16_e64 s10, v1.l, v2.l         ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v1.l, v2.l    ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
 
 0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00
-# W32: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
-# W64: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cmp_u_f16_e64 s10, v255.l, v255.l     ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v255.l, v255.l ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
 
 0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00
 # W32: v_cmp_u_f16_e64 s10, s1, s2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
@@ -4586,6 +4815,19 @@
 0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
 # GFX12: v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
+0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cmp_u_f16_e64 s10, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x08,0xd4,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+
+0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cmp_u_f16_e64 s10, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64 s10, v255, v255         ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cmp_u_f16_e64 s[10:11], v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x08,0xd4,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64 s[10:11], v255, v255    ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+
+
 0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00
 # W32: v_cmp_u_f32_e64 s10, v1, v2             ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
 # W64: v_cmp_u_f32_e64 s[10:11], v1, v2        ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt
index 59637e1e0416a..f8accb94d18fe 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt
@@ -5,63 +5,106 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_class_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
 0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# W32: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-# W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
 
 0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x08,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+
+0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_class_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -123,63 +166,106 @@
 # GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_eq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_eq_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -563,63 +649,106 @@
 # GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_ge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ge_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1009,63 +1138,106 @@
 # GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_gt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_gt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1449,63 +1621,106 @@
 # GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_le_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_le_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1889,63 +2104,106 @@
 # GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_lg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_lg_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2812,63 +3070,106 @@
 # GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_neq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_neq_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -2930,63 +3231,106 @@
 # GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nge_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3048,63 +3392,106 @@
 # GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_ngt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ngt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3166,63 +3553,106 @@
 # GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nle_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nle_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3284,63 +3714,106 @@
 # GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlg_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3402,63 +3875,106 @@
 # GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlt_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3520,63 +4036,106 @@
 # GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_o_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_o_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -3642,63 +4201,106 @@
 # GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
 0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# W32: v_cmp_u_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-# W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s104, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
 0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
 0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# W32: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-# W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
 
 0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x0a,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
 0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
 # W32: v_cmp_u_f32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt
index bc380f2f312cb..683a63db76f34 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt
@@ -5,31 +5,58 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s
 
 0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s10, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_class_f16_e64_dpp ttmp14, v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp ttmp14, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x08,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e64_dpp null, -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x11,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_class_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -55,27 +82,52 @@
 # GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_eq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x02,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_eq_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -243,27 +295,52 @@
 # GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x06,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_ge_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -439,27 +516,52 @@
 # GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_gt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x04,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_gt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -627,27 +729,52 @@
 # GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_le_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x03,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_le_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -815,27 +942,52 @@
 # GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_lg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x05,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_lg_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1216,27 +1368,52 @@
 # GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_neq_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0d,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_neq_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1262,27 +1439,52 @@
 # GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nge_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x09,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nge_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1308,27 +1510,52 @@
 # GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0b,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_ngt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1354,27 +1581,52 @@
 # GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nle_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0c,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nle_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1400,27 +1652,52 @@
 # GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0a,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlg_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1450,27 +1727,52 @@
 # GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x0e,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlt_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1496,27 +1798,52 @@
 # GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_o_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x07,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_o_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -1542,27 +1869,52 @@
 # GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s10, v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[10:11], v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05]
 
 0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp s104, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp s[104:105], v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
 0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp vcc_lo, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp vcc, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
 0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
 0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cmp_u_f16_e64_dpp ttmp14, -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x0a,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x08,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e64_dpp null, -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x93,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
 0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
 # W32: v_cmp_u_f32_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
index 12c26963c6791..8b50b119a00e9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
@@ -5,64 +5,118 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0x01,0x05,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v1.l, v2.l  ; encoding: [0x01,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v1.l, v2.l     ; encoding: [0x01,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v1, v2         ; encoding: [0x01,0x05,0xfa,0x7c]
 
 0x7f,0x05,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v127.l, v2.l ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v127.l, v2.l   ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v127, v2    ; encoding: [0x7f,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v127, v2       ; encoding: [0x7f,0x05,0xfa,0x7c]
 
 0x01,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, s1, v2.l    ; encoding: [0x01,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, s1, v2.l       ; encoding: [0x01,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, s1, v2      ; encoding: [0x01,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, s1, v2         ; encoding: [0x01,0x04,0xfa,0x7c]
 
 0x69,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, s105, v2.l  ; encoding: [0x69,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, s105, v2.l     ; encoding: [0x69,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, s105, v2    ; encoding: [0x69,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, s105, v2       ; encoding: [0x69,0x04,0xfa,0x7c]
 
 0x6a,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2.l ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, vcc_lo, v2  ; encoding: [0x6a,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, vcc_lo, v2     ; encoding: [0x6a,0x04,0xfa,0x7c]
 
 0x6b,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2.l ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, vcc_hi, v2  ; encoding: [0x6b,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, vcc_hi, v2     ; encoding: [0x6b,0x04,0xfa,0x7c]
 
 0x7b,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2.l ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, ttmp15, v2.l   ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, ttmp15, v2  ; encoding: [0x7b,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, ttmp15, v2     ; encoding: [0x7b,0x04,0xfa,0x7c]
 
 0x7d,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, m0, v2.l    ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, m0, v2.l       ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, m0, v2      ; encoding: [0x7d,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, m0, v2         ; encoding: [0x7d,0x04,0xfa,0x7c]
 
 0x7e,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, exec_lo, v2.l  ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, exec_lo, v2    ; encoding: [0x7e,0x04,0xfa,0x7c]
 
 0x7f,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, exec_hi, v2.l  ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, exec_hi, v2    ; encoding: [0x7f,0x04,0xfa,0x7c]
 
 0x7c,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, null, v2.l  ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, null, v2.l     ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, null, v2    ; encoding: [0x7c,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, null, v2       ; encoding: [0x7c,0x04,0xfa,0x7c]
 
 0xc1,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, -1, v2.l    ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, -1, v2.l       ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0xfa,0x7c]
 
 0xf0,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0.5, v2.l   ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0.5, v2.l      ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0.5, v2     ; encoding: [0xf0,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0.5, v2        ; encoding: [0xf0,0x04,0xfa,0x7c]
 
 0xfd,0x04,0xfa,0x7c
-# W32: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
-# W64: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, src_scc, v2.l  ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2 ; encoding: [0xfd,0x04,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, src_scc, v2    ; encoding: [0xfd,0x04,0xfa,0x7c]
 
 0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127   ; encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0xfa,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v1.h, v2.l  ; encoding: [0x81,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v1.h, v2.l     ; encoding: [0x81,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0xfa,0x7c]
+
+0xff,0x05,0xfa,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, v127.h, v2.l ; encoding: [0xff,0x05,0xfa,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, v127.h, v2.l   ; encoding: [0xff,0x05,0xfa,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0xfa,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0xfa,0x7c]
+
+0xfd,0x04,0xfb,0x7c
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, src_scc, v2.h  ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0xfb,0x7c]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0xfb,0x7c]
+
+0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16_e32 vcc, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0xfb,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0xfc,0x7c
 # W32: v_cmp_class_f32_e32 vcc_lo, v1, v2      ; encoding: [0x01,0x05,0xfc,0x7c]
@@ -173,64 +227,118 @@
 # W64: v_cmp_class_f64_e32 vcc, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x04,0x7c]
 
 0x7f,0x05,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x04,0x7c]
 
 0x01,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x04,0x7c]
 
 0x69,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x04,0x7c]
 
 0x6a,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x04,0x7c]
 
 0x6b,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x04,0x7c]
 
 0x7b,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x04,0x7c]
 
 0x7d,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x04,0x7c]
 
 0x7e,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x04,0x7c]
 
 0x7f,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x04,0x7c]
 
 0x7c,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x04,0x7c]
 
 0xc1,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x04,0x7c]
 
 0xf0,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x04,0x7c]
 
 0xfd,0x04,0x04,0x7c
-# W32: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
-# W64: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x04,0x7c]
 
 0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x04,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x04,0x7c]
+
+0xff,0x05,0x04,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x04,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x04,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x04,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x04,0x7c]
+
+0xfd,0x04,0x05,0x7c
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x05,0x7c]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x05,0x7c]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x05,0x7c]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x05,0x7c]
+
+0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x05,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x24,0x7c
 # W32: v_cmp_eq_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x24,0x7c]
@@ -785,64 +893,118 @@
 # W64: v_cmp_eq_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0c,0x7c]
 
 0x7f,0x05,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0c,0x7c]
 
 0x01,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0c,0x7c]
 
 0x69,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0c,0x7c]
 
 0x6a,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0c,0x7c]
 
 0x6b,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0c,0x7c]
 
 0x7b,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0c,0x7c]
 
 0x7d,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0c,0x7c]
 
 0x7e,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0c,0x7c]
 
 0x7f,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0c,0x7c]
 
 0x7c,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0c,0x7c]
 
 0xc1,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0c,0x7c]
 
 0xf0,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0c,0x7c]
 
 0xfd,0x04,0x0c,0x7c
-# W32: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
-# W64: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0c,0x7c]
 
 0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0c,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0c,0x7c]
+
+0xff,0x05,0x0c,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0c,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0c,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0c,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0c,0x7c]
+
+0xfd,0x04,0x0d,0x7c
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0d,0x7c]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0d,0x7c]
+
+0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0d,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2c,0x7c
 # W32: v_cmp_ge_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x2c,0x7c]
@@ -1397,64 +1559,118 @@
 # W64: v_cmp_ge_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x08,0x7c]
 
 0x7f,0x05,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x08,0x7c]
 
 0x01,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x08,0x7c]
 
 0x69,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x08,0x7c]
 
 0x6a,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x08,0x7c]
 
 0x6b,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x08,0x7c]
 
 0x7b,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x08,0x7c]
 
 0x7d,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x08,0x7c]
 
 0x7e,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x08,0x7c]
 
 0x7f,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x08,0x7c]
 
 0x7c,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x08,0x7c]
 
 0xc1,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x08,0x7c]
 
 0xf0,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x08,0x7c]
 
 0xfd,0x04,0x08,0x7c
-# W32: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
-# W64: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x08,0x7c]
 
 0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x08,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x08,0x7c]
+
+0xff,0x05,0x08,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x08,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x08,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x08,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x08,0x7c]
+
+0xfd,0x04,0x09,0x7c
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x09,0x7c]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x09,0x7c]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x09,0x7c]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x09,0x7c]
+
+0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x09,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x28,0x7c
 # W32: v_cmp_gt_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x28,0x7c]
@@ -2009,64 +2225,118 @@
 # W64: v_cmp_gt_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x06,0x7c]
 
 0x7f,0x05,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x06,0x7c]
 
 0x01,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x06,0x7c]
 
 0x69,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x06,0x7c]
 
 0x6a,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x06,0x7c]
 
 0x6b,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x06,0x7c]
 
 0x7b,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x06,0x7c]
 
 0x7d,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x06,0x7c]
 
 0x7e,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x06,0x7c]
 
 0x7f,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x06,0x7c]
 
 0x7c,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x06,0x7c]
 
 0xc1,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x06,0x7c]
 
 0xf0,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x06,0x7c]
 
 0xfd,0x04,0x06,0x7c
-# W32: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
-# W64: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x06,0x7c]
 
 0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x06,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x06,0x7c]
+
+0xff,0x05,0x06,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x06,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x06,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x06,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x06,0x7c]
+
+0xfd,0x04,0x07,0x7c
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x07,0x7c]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x07,0x7c]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x07,0x7c]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x07,0x7c]
+
+0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x07,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x26,0x7c
 # W32: v_cmp_le_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x26,0x7c]
@@ -2621,64 +2891,118 @@
 # W64: v_cmp_le_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v1.l, v2.l     ; encoding: [0x01,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v1.l, v2.l        ; encoding: [0x01,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v1, v2            ; encoding: [0x01,0x05,0x0a,0x7c]
 
 0x7f,0x05,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v127.l, v2.l   ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v127, v2       ; encoding: [0x7f,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v127, v2          ; encoding: [0x7f,0x05,0x0a,0x7c]
 
 0x01,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, s1, v2.l       ; encoding: [0x01,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, s1, v2.l          ; encoding: [0x01,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, s1, v2         ; encoding: [0x01,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, s1, v2            ; encoding: [0x01,0x04,0x0a,0x7c]
 
 0x69,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, s105, v2.l     ; encoding: [0x69,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, s105, v2.l        ; encoding: [0x69,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, s105, v2       ; encoding: [0x69,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, s105, v2          ; encoding: [0x69,0x04,0x0a,0x7c]
 
 0x6a,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2.l   ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2     ; encoding: [0x6a,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, vcc_lo, v2        ; encoding: [0x6a,0x04,0x0a,0x7c]
 
 0x6b,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2.l   ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2     ; encoding: [0x6b,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, vcc_hi, v2        ; encoding: [0x6b,0x04,0x0a,0x7c]
 
 0x7b,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2.l   ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, ttmp15, v2     ; encoding: [0x7b,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, ttmp15, v2        ; encoding: [0x7b,0x04,0x0a,0x7c]
 
 0x7d,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, m0, v2.l       ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, m0, v2.l          ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, m0, v2         ; encoding: [0x7d,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, m0, v2            ; encoding: [0x7d,0x04,0x0a,0x7c]
 
 0x7e,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2.l  ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2    ; encoding: [0x7e,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, exec_lo, v2       ; encoding: [0x7e,0x04,0x0a,0x7c]
 
 0x7f,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2.l  ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2    ; encoding: [0x7f,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, exec_hi, v2       ; encoding: [0x7f,0x04,0x0a,0x7c]
 
 0x7c,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, null, v2.l     ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, null, v2.l        ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, null, v2       ; encoding: [0x7c,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, null, v2          ; encoding: [0x7c,0x04,0x0a,0x7c]
 
 0xc1,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, -1, v2.l       ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, -1, v2.l          ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, -1, v2         ; encoding: [0xc1,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, -1, v2            ; encoding: [0xc1,0x04,0x0a,0x7c]
 
 0xf0,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2.l      ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0.5, v2        ; encoding: [0xf0,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0.5, v2           ; encoding: [0xf0,0x04,0x0a,0x7c]
 
 0xfd,0x04,0x0a,0x7c
-# W32: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
-# W64: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.l  ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2    ; encoding: [0xfd,0x04,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, src_scc, v2       ; encoding: [0xfd,0x04,0x0a,0x7c]
 
 0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.l    ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127   ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127      ; encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0a,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v1.h, v2.l     ; encoding: [0x81,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v1.h, v2.l        ; encoding: [0x81,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x7c]
+
+0xff,0x05,0x0a,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, v127.h, v2.l   ; encoding: [0xff,0x05,0x0a,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, v127.h, v2.l      ; encoding: [0xff,0x05,0x0a,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x7c]
+
+0xfd,0x04,0x0b,0x7c
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v2.h  ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, src_scc, v2.h     ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0b,0x7c]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0b,0x7c]
+
+0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v127.h    ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0b,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2a,0x7c
 # W32: v_cmp_lg_f32_e32 vcc_lo, v1, v2         ; encoding: [0x01,0x05,0x2a,0x7c]
@@ -3899,64 +4223,118 @@
 # W64: v_cmp_ne_u64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1a,0x7c]
 
 0x7f,0x05,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1a,0x7c]
 
 0x01,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1a,0x7c]
 
 0x69,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1a,0x7c]
 
 0x6a,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1a,0x7c]
 
 0x6b,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1a,0x7c]
 
 0x7b,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1a,0x7c]
 
 0x7d,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1a,0x7c]
 
 0x7e,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1a,0x7c]
 
 0x7f,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1a,0x7c]
 
 0x7c,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1a,0x7c]
 
 0xc1,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1a,0x7c]
 
 0xf0,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1a,0x7c]
 
 0xfd,0x04,0x1a,0x7c
-# W32: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
-# W64: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1a,0x7c]
 
 0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x1a,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1a,0x7c]
+
+0xff,0x05,0x1a,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1a,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1a,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1a,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1a,0x7c]
+
+0xfd,0x04,0x1b,0x7c
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1b,0x7c]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1b,0x7c]
+
+0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1b,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x3a,0x7c
 # W32: v_cmp_neq_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x3a,0x7c]
@@ -4067,64 +4445,118 @@
 # W64: v_cmp_neq_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x12,0x7c]
 
 0x7f,0x05,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x12,0x7c]
 
 0x01,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x12,0x7c]
 
 0x69,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x12,0x7c]
 
 0x6a,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x12,0x7c]
 
 0x6b,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x12,0x7c]
 
 0x7b,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x12,0x7c]
 
 0x7d,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x12,0x7c]
 
 0x7e,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x12,0x7c]
 
 0x7f,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x12,0x7c]
 
 0x7c,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x12,0x7c]
 
 0xc1,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x12,0x7c]
 
 0xf0,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x12,0x7c]
 
 0xfd,0x04,0x12,0x7c
-# W32: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
-# W64: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x12,0x7c]
 
 0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x12,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x12,0x7c]
+
+0xff,0x05,0x12,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x12,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x12,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x12,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x12,0x7c]
+
+0xfd,0x04,0x13,0x7c
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x13,0x7c]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x13,0x7c]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x13,0x7c]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x13,0x7c]
+
+0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x13,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x32,0x7c
 # W32: v_cmp_nge_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x32,0x7c]
@@ -4235,64 +4667,118 @@
 # W64: v_cmp_nge_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x16,0x7c]
 
 0x7f,0x05,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x16,0x7c]
 
 0x01,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x16,0x7c]
 
 0x69,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x16,0x7c]
 
 0x6a,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x16,0x7c]
 
 0x6b,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x16,0x7c]
 
 0x7b,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x16,0x7c]
 
 0x7d,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x16,0x7c]
 
 0x7e,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x16,0x7c]
 
 0x7f,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x16,0x7c]
 
 0x7c,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x16,0x7c]
 
 0xc1,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x16,0x7c]
 
 0xf0,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x16,0x7c]
 
 0xfd,0x04,0x16,0x7c
-# W32: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
-# W64: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x16,0x7c]
 
 0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x16,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x16,0x7c]
+
+0xff,0x05,0x16,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x16,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x16,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x16,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x16,0x7c]
+
+0xfd,0x04,0x17,0x7c
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x17,0x7c]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x17,0x7c]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x17,0x7c]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x17,0x7c]
+
+0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x17,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x36,0x7c
 # W32: v_cmp_ngt_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x36,0x7c]
@@ -4403,64 +4889,118 @@
 # W64: v_cmp_ngt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x18,0x7c]
 
 0x7f,0x05,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x18,0x7c]
 
 0x01,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x18,0x7c]
 
 0x69,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x18,0x7c]
 
 0x6a,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x18,0x7c]
 
 0x6b,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x18,0x7c]
 
 0x7b,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x18,0x7c]
 
 0x7d,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x18,0x7c]
 
 0x7e,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x18,0x7c]
 
 0x7f,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x18,0x7c]
 
 0x7c,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x18,0x7c]
 
 0xc1,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x18,0x7c]
 
 0xf0,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x18,0x7c]
 
 0xfd,0x04,0x18,0x7c
-# W32: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
-# W64: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x18,0x7c]
 
 0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x18,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x18,0x7c]
+
+0xff,0x05,0x18,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x18,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x18,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x18,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x18,0x7c]
+
+0xfd,0x04,0x19,0x7c
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x19,0x7c]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x19,0x7c]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x19,0x7c]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x19,0x7c]
+
+0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x19,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x38,0x7c
 # W32: v_cmp_nle_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x38,0x7c]
@@ -4571,64 +5111,118 @@
 # W64: v_cmp_nle_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x14,0x7c]
 
 0x7f,0x05,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x14,0x7c]
 
 0x01,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x14,0x7c]
 
 0x69,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x14,0x7c]
 
 0x6a,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x14,0x7c]
 
 0x6b,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x14,0x7c]
 
 0x7b,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x14,0x7c]
 
 0x7d,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x14,0x7c]
 
 0x7e,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x14,0x7c]
 
 0x7f,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x14,0x7c]
 
 0x7c,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x14,0x7c]
 
 0xc1,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x14,0x7c]
 
 0xf0,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x14,0x7c]
 
 0xfd,0x04,0x14,0x7c
-# W32: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
-# W64: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x14,0x7c]
 
 0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x14,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x14,0x7c]
+
+0xff,0x05,0x14,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x14,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x14,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x14,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x14,0x7c]
+
+0xfd,0x04,0x15,0x7c
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x15,0x7c]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x15,0x7c]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x15,0x7c]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x15,0x7c]
+
+0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x15,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x34,0x7c
 # W32: v_cmp_nlg_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x34,0x7c]
@@ -4739,64 +5333,118 @@
 # W64: v_cmp_nlg_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v2.l    ; encoding: [0x01,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v1.l, v2.l       ; encoding: [0x01,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0x1c,0x7c]
 
 0x7f,0x05,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v127.l, v2.l  ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v127.l, v2.l     ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v127, v2      ; encoding: [0x7f,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v127, v2         ; encoding: [0x7f,0x05,0x1c,0x7c]
 
 0x01,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, s1, v2.l      ; encoding: [0x01,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, s1, v2.l         ; encoding: [0x01,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, s1, v2        ; encoding: [0x01,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, s1, v2           ; encoding: [0x01,0x04,0x1c,0x7c]
 
 0x69,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, s105, v2.l    ; encoding: [0x69,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, s105, v2.l       ; encoding: [0x69,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, s105, v2      ; encoding: [0x69,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, s105, v2         ; encoding: [0x69,0x04,0x1c,0x7c]
 
 0x6a,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2.l  ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2.l     ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2    ; encoding: [0x6a,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2       ; encoding: [0x6a,0x04,0x1c,0x7c]
 
 0x6b,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2.l  ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2.l     ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2    ; encoding: [0x6b,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2       ; encoding: [0x6b,0x04,0x1c,0x7c]
 
 0x7b,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2.l  ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, ttmp15, v2.l     ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, ttmp15, v2    ; encoding: [0x7b,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, ttmp15, v2       ; encoding: [0x7b,0x04,0x1c,0x7c]
 
 0x7d,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, m0, v2.l      ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, m0, v2.l         ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, m0, v2        ; encoding: [0x7d,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, m0, v2           ; encoding: [0x7d,0x04,0x1c,0x7c]
 
 0x7e,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2.l ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, exec_lo, v2.l    ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2   ; encoding: [0x7e,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, exec_lo, v2      ; encoding: [0x7e,0x04,0x1c,0x7c]
 
 0x7f,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2.l ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, exec_hi, v2.l    ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2   ; encoding: [0x7f,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, exec_hi, v2      ; encoding: [0x7f,0x04,0x1c,0x7c]
 
 0x7c,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, null, v2.l    ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, null, v2.l       ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, null, v2      ; encoding: [0x7c,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, null, v2         ; encoding: [0x7c,0x04,0x1c,0x7c]
 
 0xc1,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, -1, v2.l      ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, -1, v2.l         ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, -1, v2        ; encoding: [0xc1,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, -1, v2           ; encoding: [0xc1,0x04,0x1c,0x7c]
 
 0xf0,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2.l     ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0.5, v2.l        ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v2       ; encoding: [0xf0,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0.5, v2          ; encoding: [0xf0,0x04,0x1c,0x7c]
 
 0xfd,0x04,0x1c,0x7c
-# W32: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
-# W64: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.l ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, src_scc, v2.l    ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2   ; encoding: [0xfd,0x04,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, src_scc, v2      ; encoding: [0xfd,0x04,0x1c,0x7c]
 
 0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.l   ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127  ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127     ; encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x1c,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v2.l    ; encoding: [0x81,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v1.h, v2.l       ; encoding: [0x81,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x1c,0x7c]
+
+0xff,0x05,0x1c,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, v127.h, v2.l  ; encoding: [0xff,0x05,0x1c,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, v127.h, v2.l     ; encoding: [0xff,0x05,0x1c,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1c,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x1c,0x7c]
+
+0xfd,0x04,0x1d,0x7c
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v2.h ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, src_scc, v2.h    ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1d,0x7c]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x1d,0x7c]
+
+0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v127.h   ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x1d,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x3c,0x7c
 # W32: v_cmp_nlt_f32_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0x3c,0x7c]
@@ -4907,64 +5555,118 @@
 # W64: v_cmp_nlt_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x0e,0x7c]
 
 0x7f,0x05,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x0e,0x7c]
 
 0x01,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x0e,0x7c]
 
 0x69,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x0e,0x7c]
 
 0x6a,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x0e,0x7c]
 
 0x6b,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x0e,0x7c]
 
 0x7b,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x0e,0x7c]
 
 0x7d,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x0e,0x7c]
 
 0x7e,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x0e,0x7c]
 
 0x7f,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x0e,0x7c]
 
 0x7c,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x0e,0x7c]
 
 0xc1,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x0e,0x7c]
 
 0xf0,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x0e,0x7c]
 
 0xfd,0x04,0x0e,0x7c
-# W32: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
-# W64: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x0e,0x7c]
 
 0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0e,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0e,0x7c]
+
+0xff,0x05,0x0e,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x0e,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x0e,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0e,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0e,0x7c]
+
+0xfd,0x04,0x0f,0x7c
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0f,0x7c]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x0f,0x7c]
+
+0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x0f,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x2e,0x7c
 # W32: v_cmp_o_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x2e,0x7c]
@@ -5075,64 +5777,118 @@
 # W64: v_cmp_o_f64_e32 vcc, 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v1.l, v2.l      ; encoding: [0x01,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v1.l, v2.l         ; encoding: [0x01,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v1, v2             ; encoding: [0x01,0x05,0x10,0x7c]
 
 0x7f,0x05,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v127.l, v2.l    ; encoding: [0x7f,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v127.l, v2.l       ; encoding: [0x7f,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v127, v2        ; encoding: [0x7f,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v127, v2           ; encoding: [0x7f,0x05,0x10,0x7c]
 
 0x01,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, s1, v2.l        ; encoding: [0x01,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, s1, v2.l           ; encoding: [0x01,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, s1, v2          ; encoding: [0x01,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, s1, v2             ; encoding: [0x01,0x04,0x10,0x7c]
 
 0x69,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, s105, v2.l      ; encoding: [0x69,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, s105, v2.l         ; encoding: [0x69,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, s105, v2        ; encoding: [0x69,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, s105, v2           ; encoding: [0x69,0x04,0x10,0x7c]
 
 0x6a,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2.l    ; encoding: [0x6a,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, vcc_lo, v2.l       ; encoding: [0x6a,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2      ; encoding: [0x6a,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, vcc_lo, v2         ; encoding: [0x6a,0x04,0x10,0x7c]
 
 0x6b,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2.l    ; encoding: [0x6b,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, vcc_hi, v2.l       ; encoding: [0x6b,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2      ; encoding: [0x6b,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, vcc_hi, v2         ; encoding: [0x6b,0x04,0x10,0x7c]
 
 0x7b,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2.l    ; encoding: [0x7b,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, ttmp15, v2.l       ; encoding: [0x7b,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, ttmp15, v2      ; encoding: [0x7b,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, ttmp15, v2         ; encoding: [0x7b,0x04,0x10,0x7c]
 
 0x7d,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, m0, v2.l        ; encoding: [0x7d,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, m0, v2.l           ; encoding: [0x7d,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, m0, v2          ; encoding: [0x7d,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, m0, v2             ; encoding: [0x7d,0x04,0x10,0x7c]
 
 0x7e,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2.l   ; encoding: [0x7e,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, exec_lo, v2.l      ; encoding: [0x7e,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2     ; encoding: [0x7e,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, exec_lo, v2        ; encoding: [0x7e,0x04,0x10,0x7c]
 
 0x7f,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2.l   ; encoding: [0x7f,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, exec_hi, v2.l      ; encoding: [0x7f,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2     ; encoding: [0x7f,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, exec_hi, v2        ; encoding: [0x7f,0x04,0x10,0x7c]
 
 0x7c,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, null, v2.l      ; encoding: [0x7c,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, null, v2.l         ; encoding: [0x7c,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, null, v2        ; encoding: [0x7c,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, null, v2           ; encoding: [0x7c,0x04,0x10,0x7c]
 
 0xc1,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, -1, v2.l        ; encoding: [0xc1,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, -1, v2.l           ; encoding: [0xc1,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, -1, v2          ; encoding: [0xc1,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, -1, v2             ; encoding: [0xc1,0x04,0x10,0x7c]
 
 0xf0,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0.5, v2.l       ; encoding: [0xf0,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0.5, v2.l          ; encoding: [0xf0,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0.5, v2         ; encoding: [0xf0,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0.5, v2            ; encoding: [0xf0,0x04,0x10,0x7c]
 
 0xfd,0x04,0x10,0x7c
-# W32: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
-# W64: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.l   ; encoding: [0xfd,0x04,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, src_scc, v2.l      ; encoding: [0xfd,0x04,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2     ; encoding: [0xfd,0x04,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, src_scc, v2        ; encoding: [0xfd,0x04,0x10,0x7c]
 
 0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00
-# W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
-# W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.l     ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127    ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127       ; encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x10,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v1.h, v2.l      ; encoding: [0x81,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v1.h, v2.l         ; encoding: [0x81,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x10,0x7c]
+
+0xff,0x05,0x10,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, v127.h, v2.l    ; encoding: [0xff,0x05,0x10,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, v127.h, v2.l       ; encoding: [0xff,0x05,0x10,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x10,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x10,0x7c]
+
+0xfd,0x04,0x11,0x7c
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, src_scc, v2.h   ; encoding: [0xfd,0x04,0x11,0x7c]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, src_scc, v2.h      ; encoding: [0xfd,0x04,0x11,0x7c]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x11,0x7c]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x11,0x7c]
+
+0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v127.h  ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16_e32 vcc, 0xfe0b, v127.h     ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16_e32 vcc, 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x11,0x7c,0x0b,0xfe,0x00,0x00]
 
 0x01,0x05,0x30,0x7c
 # W32: v_cmp_u_f32_e32 vcc_lo, v1, v2          ; encoding: [0x01,0x05,0x30,0x7c]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
index e3f25d24d9d52..4d534d3111433 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
@@ -5,60 +5,100 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30
-# W32: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
-# W64: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W64-REAL16: v_cmp_class_f16 vcc, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+# W64-FAKE16: v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x3d,0x30]
+
+0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_class_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0xfb,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30
+# W32-REAL16: v_cmp_class_f16 vcc_lo, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W64-REAL16: v_cmp_class_f16 vcc, -|v127.h|, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
+# W64-FAKE16: v_cmp_class_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfb,0x7c,0xff,0x6f,0x3d,0x30]
 
 0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
@@ -117,60 +157,100 @@
 # W64: v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x3d,0x30]
 
 0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_eq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x05,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_eq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_eq_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x05,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
@@ -533,60 +613,100 @@
 # W64: v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0d,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ge_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
@@ -949,60 +1069,100 @@
 # W64: v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_gt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x09,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_gt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_gt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x09,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
@@ -1365,60 +1525,100 @@
 # W64: v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_le_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_le_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x07,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_le_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_le_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_le_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x07,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
@@ -1781,60 +1981,100 @@
 # W64: v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_lg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0b,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_lg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_lg_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
@@ -2653,60 +2893,100 @@
 # W64: v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x0d,0x30]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_neq_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1b,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_neq_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_neq_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
@@ -2765,60 +3045,100 @@
 # W64: v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nge_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x13,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nge_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nge_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x13,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
@@ -2877,60 +3197,100 @@
 # W64: v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ngt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x17,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_ngt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x17,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
@@ -2989,60 +3349,100 @@
 # W64: v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nle_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x19,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nle_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nle_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x19,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
@@ -3101,60 +3501,100 @@
 # W64: v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlg_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x15,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlg_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x15,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
@@ -3213,60 +3653,100 @@
 # W64: v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlt_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x1d,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_nlt_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x1d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
@@ -3325,60 +3805,100 @@
 # W64: v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_o_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_o_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0f,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_o_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_o_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_o_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x0f,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
@@ -3437,60 +3957,100 @@
 # W64: v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
 
 0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
 
 0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
-# W64: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x01,0x13]
 
 0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30
-# W32: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
-# W64: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_u_f16 vcc, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xfd,0x30]
+
+0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+# W64-FAKE16: v_cmp_u_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x11,0x7c,0x81,0x60,0x01,0x13]
+
+0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30
+# W32-REAL16: v_cmp_u_f16 vcc_lo, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-REAL16: v_cmp_u_f16 vcc, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
+# W64-FAKE16: v_cmp_u_f16 vcc, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x11,0x7c,0xff,0x6f,0xfd,0x30]
 
 0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff
 # W32: v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
index d1ba8a4595086..ce68b77eadd8d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
@@ -5,12 +5,29 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64,W64-FAKE16
 
 0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_class_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_class_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfb,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_class_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_class_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_class_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_class_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfb,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
@@ -21,12 +38,29 @@
 # W64: v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_eq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x05,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_eq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_eq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_eq_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_eq_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x05,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
@@ -103,12 +137,29 @@
 # W64: v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0d,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ge_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ge_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0d,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
@@ -185,12 +236,29 @@
 # W64: v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_gt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x09,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_gt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_gt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_gt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_gt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x09,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
@@ -267,12 +335,29 @@
 # W64: v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_le_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_le_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x07,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_le_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_le_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_le_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_le_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x07,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
@@ -349,12 +434,29 @@
 # W64: v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_lg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0b,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_lg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_lg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_lg_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_lg_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0b,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
@@ -529,12 +631,29 @@
 # W64: v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_neq_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1b,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_neq_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_neq_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_neq_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_neq_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1b,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
@@ -545,12 +664,29 @@
 # W64: v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nge_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x13,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nge_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nge_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nge_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nge_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x13,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
@@ -561,12 +697,29 @@
 # W64: v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x17,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_ngt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_ngt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_ngt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_ngt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x17,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
@@ -577,12 +730,29 @@
 # W64: v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nle_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x19,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nle_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nle_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nle_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nle_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x19,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
@@ -593,12 +763,29 @@
 # W64: v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x15,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlg_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlg_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlg_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlg_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x15,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
@@ -609,12 +796,29 @@
 # W64: v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x1d,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_nlt_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_nlt_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_nlt_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_nlt_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x1d,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
@@ -625,12 +829,29 @@
 # W64: v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_o_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_o_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0f,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_o_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_o_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_o_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_o_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x0f,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
@@ -641,12 +862,29 @@
 # W64: v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
 
 0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05
-# W32: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
-# W64: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
 
 0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00
-# W32: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
-# W64: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+
+0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W64-REAL16: v_cmp_u_f16 vcc, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+# W64-FAKE16: v_cmp_u_f16 vcc, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x11,0x7c,0x81,0x77,0x39,0x05]
+
+0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cmp_u_f16 vcc_lo, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cmp_u_f16 vcc, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cmp_u_f16 vcc_lo, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cmp_u_f16 vcc, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x11,0x7c,0xff,0x00,0x00,0x00]
+
 
 0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05
 # W32: v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt
index 532128c19768b..96f9bdccbde19 100644
--- a/llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt
@@ -1,112 +1,112 @@
 # RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
 # RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:   vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2
-# INTEL: vminmaxnepbf16 xmm2, xmm3, xmm4, 123
+# ATT:   vminmaxbf16 $123, %xmm4, %xmm3, %xmm2
+# INTEL: vminmaxbf16 xmm2, xmm3, xmm4, 123
 0x62,0xf3,0x67,0x08,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
-# INTEL: vminmaxnepbf16 xmm2 {k7}, xmm3, xmm4, 123
+# ATT:   vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vminmaxbf16 xmm2 {k7}, xmm3, xmm4, 123
 0x62,0xf3,0x67,0x0f,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
+# ATT:   vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminmaxbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
 0x62,0xf3,0x67,0x8f,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2
-# INTEL: vminmaxnepbf16 zmm2, zmm3, zmm4, 123
+# ATT:   vminmaxbf16 $123, %zmm4, %zmm3, %zmm2
+# INTEL: vminmaxbf16 zmm2, zmm3, zmm4, 123
 0x62,0xf3,0x67,0x48,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
-# INTEL: vminmaxnepbf16 zmm2 {k7}, zmm3, zmm4, 123
+# ATT:   vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vminmaxbf16 zmm2 {k7}, zmm3, zmm4, 123
 0x62,0xf3,0x67,0x4f,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
+# ATT:   vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminmaxbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
 0x62,0xf3,0x67,0xcf,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2
-# INTEL: vminmaxnepbf16 ymm2, ymm3, ymm4, 123
+# ATT:   vminmaxbf16 $123, %ymm4, %ymm3, %ymm2
+# INTEL: vminmaxbf16 ymm2, ymm3, ymm4, 123
 0x62,0xf3,0x67,0x28,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
-# INTEL: vminmaxnepbf16 ymm2 {k7}, ymm3, ymm4, 123
+# ATT:   vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminmaxbf16 ymm2 {k7}, ymm3, ymm4, 123
 0x62,0xf3,0x67,0x2f,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
+# ATT:   vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminmaxbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
 0x62,0xf3,0x67,0xaf,0x52,0xd4,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
-# INTEL: vminmaxnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vminmaxbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
 0x62,0xf3,0x67,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
-# INTEL: vminmaxnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vminmaxbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
 0x62,0xf3,0x67,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%eax){1to16}, %ymm3, %ymm2
-# INTEL: vminmaxnepbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
+# ATT:   vminmaxbf16  $123, (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vminmaxbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
 0x62,0xf3,0x67,0x38,0x52,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
-# INTEL: vminmaxnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+# ATT:   vminmaxbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vminmaxbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
 0x62,0xf3,0x67,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+# ATT:   vminmaxbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminmaxbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
 0x62,0xf3,0x67,0xaf,0x52,0x51,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
+# ATT:   vminmaxbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminmaxbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
 0x62,0xf3,0x67,0xbf,0x52,0x52,0x80,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
-# INTEL: vminmaxnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vminmaxbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
 0x62,0xf3,0x67,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
-# INTEL: vminmaxnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vminmaxbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
 0x62,0xf3,0x67,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%eax){1to8}, %xmm3, %xmm2
-# INTEL: vminmaxnepbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
+# ATT:   vminmaxbf16  $123, (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vminmaxbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
 0x62,0xf3,0x67,0x18,0x52,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
-# INTEL: vminmaxnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+# ATT:   vminmaxbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vminmaxbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
 0x62,0xf3,0x67,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+# ATT:   vminmaxbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminmaxbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
 0x62,0xf3,0x67,0x8f,0x52,0x51,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
+# ATT:   vminmaxbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminmaxbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
 0x62,0xf3,0x67,0x9f,0x52,0x52,0x80,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
-# INTEL: vminmaxnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vminmaxbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
 0x62,0xf3,0x67,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
-# INTEL: vminmaxnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vminmaxbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
 0x62,0xf3,0x67,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%eax){1to32}, %zmm3, %zmm2
-# INTEL: vminmaxnepbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
+# ATT:   vminmaxbf16  $123, (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vminmaxbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
 0x62,0xf3,0x67,0x58,0x52,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
-# INTEL: vminmaxnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+# ATT:   vminmaxbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vminmaxbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
 0x62,0xf3,0x67,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+# ATT:   vminmaxbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminmaxbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
 0x62,0xf3,0x67,0xcf,0x52,0x51,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
+# ATT:   vminmaxbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminmaxbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
 0x62,0xf3,0x67,0xdf,0x52,0x52,0x80,0x7b
 
 # ATT:   vminmaxpd $123, %xmm4, %xmm3, %xmm2
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt
index fdb2f6877806e..af80fb1a3f48c 100644
--- a/llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt
@@ -1,112 +1,112 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
 # RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:   vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22
-# INTEL: vminmaxnepbf16 xmm22, xmm23, xmm24, 123
+# ATT:   vminmaxbf16 $123, %xmm24, %xmm23, %xmm22
+# INTEL: vminmaxbf16 xmm22, xmm23, xmm24, 123
 0x62,0x83,0x47,0x00,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
-# INTEL: vminmaxnepbf16 xmm22 {k7}, xmm23, xmm24, 123
+# ATT:   vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vminmaxbf16 xmm22 {k7}, xmm23, xmm24, 123
 0x62,0x83,0x47,0x07,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
+# ATT:   vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminmaxbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
 0x62,0x83,0x47,0x87,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22
-# INTEL: vminmaxnepbf16 zmm22, zmm23, zmm24, 123
+# ATT:   vminmaxbf16 $123, %zmm24, %zmm23, %zmm22
+# INTEL: vminmaxbf16 zmm22, zmm23, zmm24, 123
 0x62,0x83,0x47,0x40,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
-# INTEL: vminmaxnepbf16 zmm22 {k7}, zmm23, zmm24, 123
+# ATT:   vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vminmaxbf16 zmm22 {k7}, zmm23, zmm24, 123
 0x62,0x83,0x47,0x47,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
+# ATT:   vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminmaxbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
 0x62,0x83,0x47,0xc7,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22
-# INTEL: vminmaxnepbf16 ymm22, ymm23, ymm24, 123
+# ATT:   vminmaxbf16 $123, %ymm24, %ymm23, %ymm22
+# INTEL: vminmaxbf16 ymm22, ymm23, ymm24, 123
 0x62,0x83,0x47,0x20,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
-# INTEL: vminmaxnepbf16 ymm22 {k7}, ymm23, ymm24, 123
+# ATT:   vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminmaxbf16 ymm22 {k7}, ymm23, ymm24, 123
 0x62,0x83,0x47,0x27,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
+# ATT:   vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminmaxbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
 0x62,0x83,0x47,0xa7,0x52,0xf0,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
-# INTEL: vminmaxnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vminmaxbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
 0x62,0xa3,0x47,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
-# INTEL: vminmaxnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vminmaxbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
 0x62,0xc3,0x47,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%rip){1to16}, %ymm23, %ymm22
-# INTEL: vminmaxnepbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
+# ATT:   vminmaxbf16  $123, (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vminmaxbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
 0x62,0xe3,0x47,0x30,0x52,0x35,0x00,0x00,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
-# INTEL: vminmaxnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
+# ATT:   vminmaxbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vminmaxbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
 0x62,0xe3,0x47,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
+# ATT:   vminmaxbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminmaxbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
 0x62,0xe3,0x47,0xa7,0x52,0x71,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
+# ATT:   vminmaxbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminmaxbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
 0x62,0xe3,0x47,0xb7,0x52,0x72,0x80,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
-# INTEL: vminmaxnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vminmaxbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
 0x62,0xa3,0x47,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
-# INTEL: vminmaxnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vminmaxbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
 0x62,0xc3,0x47,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%rip){1to8}, %xmm23, %xmm22
-# INTEL: vminmaxnepbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
+# ATT:   vminmaxbf16  $123, (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vminmaxbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
 0x62,0xe3,0x47,0x10,0x52,0x35,0x00,0x00,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
-# INTEL: vminmaxnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
+# ATT:   vminmaxbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vminmaxbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
 0x62,0xe3,0x47,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
+# ATT:   vminmaxbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminmaxbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
 0x62,0xe3,0x47,0x87,0x52,0x71,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
+# ATT:   vminmaxbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminmaxbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
 0x62,0xe3,0x47,0x97,0x52,0x72,0x80,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
-# INTEL: vminmaxnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+# ATT:   vminmaxbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vminmaxbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
 0x62,0xa3,0x47,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
-# INTEL: vminmaxnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+# ATT:   vminmaxbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vminmaxbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
 0x62,0xc3,0x47,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, (%rip){1to32}, %zmm23, %zmm22
-# INTEL: vminmaxnepbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
+# ATT:   vminmaxbf16  $123, (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vminmaxbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
 0x62,0xe3,0x47,0x50,0x52,0x35,0x00,0x00,0x00,0x00,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
-# INTEL: vminmaxnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
+# ATT:   vminmaxbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vminmaxbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
 0x62,0xe3,0x47,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
 
-# ATT:   vminmaxnepbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
+# ATT:   vminmaxbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminmaxbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
 0x62,0xe3,0x47,0xc7,0x52,0x71,0x7f,0x7b
 
-# ATT:   vminmaxnepbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
-# INTEL: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
+# ATT:   vminmaxbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminmaxbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
 0x62,0xe3,0x47,0xd7,0x52,0x72,0x80,0x7b
 
 # ATT:   vminmaxpd $123, %xmm24, %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/avx10.2minmax-32-att.s b/llvm/test/MC/X86/avx10.2minmax-32-att.s
index f6900899af28e..5e3b687543c42 100644
--- a/llvm/test/MC/X86/avx10.2minmax-32-att.s
+++ b/llvm/test/MC/X86/avx10.2minmax-32-att.s
@@ -1,112 +1,112 @@
 // RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
 
-// CHECK: vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2
+// CHECK: vminmaxbf16 $123, %xmm4, %xmm3, %xmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2
+          vminmaxbf16 $123, %xmm4, %xmm3, %xmm2
 
-// CHECK: vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x0f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
+          vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0x8f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
+          vminmaxbf16 $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2
+// CHECK: vminmaxbf16 $123, %zmm4, %zmm3, %zmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2
+          vminmaxbf16 $123, %zmm4, %zmm3, %zmm2
 
-// CHECK: vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x4f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
+          vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xcf,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
+          vminmaxbf16 $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2
+// CHECK: vminmaxbf16 $123, %ymm4, %ymm3, %ymm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2
+          vminmaxbf16 $123, %ymm4, %ymm3, %ymm2
 
-// CHECK: vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x2f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
+          vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xaf,0x52,0xd4,0x7b]
-          vminmaxnepbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
+          vminmaxbf16 $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: vminmaxbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
+          vminmaxbf16  $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
 
-// CHECK: vminmaxnepbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+          vminmaxbf16  $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%eax){1to16}, %ymm3, %ymm2
+// CHECK: vminmaxbf16  $123, (%eax){1to16}, %ymm3, %ymm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x38,0x52,0x10,0x7b]
-          vminmaxnepbf16  $123, (%eax){1to16}, %ymm3, %ymm2
+          vminmaxbf16  $123, (%eax){1to16}, %ymm3, %ymm2
 
-// CHECK: vminmaxnepbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: vminmaxbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
+          vminmaxbf16  $123, -1024(,%ebp,2), %ymm3, %ymm2
 
-// CHECK: vminmaxnepbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xaf,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+          vminmaxbf16  $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xbf,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+          vminmaxbf16  $123, -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: vminmaxbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
+          vminmaxbf16  $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
 
-// CHECK: vminmaxnepbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+          vminmaxbf16  $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%eax){1to8}, %xmm3, %xmm2
+// CHECK: vminmaxbf16  $123, (%eax){1to8}, %xmm3, %xmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x18,0x52,0x10,0x7b]
-          vminmaxnepbf16  $123, (%eax){1to8}, %xmm3, %xmm2
+          vminmaxbf16  $123, (%eax){1to8}, %xmm3, %xmm2
 
-// CHECK: vminmaxnepbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: vminmaxbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
+          vminmaxbf16  $123, -512(,%ebp,2), %xmm3, %xmm2
 
-// CHECK: vminmaxnepbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0x8f,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+          vminmaxbf16  $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0x9f,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+          vminmaxbf16  $123, -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: vminmaxbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
+          vminmaxbf16  $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
 
-// CHECK: vminmaxnepbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
 // CHECK: encoding: [0x62,0xf3,0x67,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+          vminmaxbf16  $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%eax){1to32}, %zmm3, %zmm2
+// CHECK: vminmaxbf16  $123, (%eax){1to32}, %zmm3, %zmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x58,0x52,0x10,0x7b]
-          vminmaxnepbf16  $123, (%eax){1to32}, %zmm3, %zmm2
+          vminmaxbf16  $123, (%eax){1to32}, %zmm3, %zmm2
 
-// CHECK: vminmaxnepbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: vminmaxbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
+          vminmaxbf16  $123, -2048(,%ebp,2), %zmm3, %zmm2
 
-// CHECK: vminmaxnepbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xcf,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+          vminmaxbf16  $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
 // CHECK: encoding: [0x62,0xf3,0x67,0xdf,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+          vminmaxbf16  $123, -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
 
 // CHECK: vminmaxpd $123, %xmm4, %xmm3, %xmm2
 // CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x52,0xd4,0x7b]
diff --git a/llvm/test/MC/X86/avx10.2minmax-32-intel.s b/llvm/test/MC/X86/avx10.2minmax-32-intel.s
index 1d668ee15a409..c237b09e15954 100644
--- a/llvm/test/MC/X86/avx10.2minmax-32-intel.s
+++ b/llvm/test/MC/X86/avx10.2minmax-32-intel.s
@@ -1,112 +1,112 @@
 // RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: vminmaxnepbf16 xmm2, xmm3, xmm4, 123
+// CHECK: vminmaxbf16 xmm2, xmm3, xmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0xd4,0x7b]
-          vminmaxnepbf16 xmm2, xmm3, xmm4, 123
+          vminmaxbf16 xmm2, xmm3, xmm4, 123
 
-// CHECK: vminmaxnepbf16 xmm2 {k7}, xmm3, xmm4, 123
+// CHECK: vminmaxbf16 xmm2 {k7}, xmm3, xmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x0f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 xmm2 {k7}, xmm3, xmm4, 123
+          vminmaxbf16 xmm2 {k7}, xmm3, xmm4, 123
 
-// CHECK: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
+// CHECK: vminmaxbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x8f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
+          vminmaxbf16 xmm2 {k7} {z}, xmm3, xmm4, 123
 
-// CHECK: vminmaxnepbf16 zmm2, zmm3, zmm4, 123
+// CHECK: vminmaxbf16 zmm2, zmm3, zmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0xd4,0x7b]
-          vminmaxnepbf16 zmm2, zmm3, zmm4, 123
+          vminmaxbf16 zmm2, zmm3, zmm4, 123
 
-// CHECK: vminmaxnepbf16 zmm2 {k7}, zmm3, zmm4, 123
+// CHECK: vminmaxbf16 zmm2 {k7}, zmm3, zmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x4f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 zmm2 {k7}, zmm3, zmm4, 123
+          vminmaxbf16 zmm2 {k7}, zmm3, zmm4, 123
 
-// CHECK: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
+// CHECK: vminmaxbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xcf,0x52,0xd4,0x7b]
-          vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
+          vminmaxbf16 zmm2 {k7} {z}, zmm3, zmm4, 123
 
-// CHECK: vminmaxnepbf16 ymm2, ymm3, ymm4, 123
+// CHECK: vminmaxbf16 ymm2, ymm3, ymm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0xd4,0x7b]
-          vminmaxnepbf16 ymm2, ymm3, ymm4, 123
+          vminmaxbf16 ymm2, ymm3, ymm4, 123
 
-// CHECK: vminmaxnepbf16 ymm2 {k7}, ymm3, ymm4, 123
+// CHECK: vminmaxbf16 ymm2 {k7}, ymm3, ymm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x2f,0x52,0xd4,0x7b]
-          vminmaxnepbf16 ymm2 {k7}, ymm3, ymm4, 123
+          vminmaxbf16 ymm2 {k7}, ymm3, ymm4, 123
 
-// CHECK: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
+// CHECK: vminmaxbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xaf,0x52,0xd4,0x7b]
-          vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
+          vminmaxbf16 ymm2 {k7} {z}, ymm3, ymm4, 123
 
-// CHECK: vminmaxnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: vminmaxbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+          vminmaxbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
 
-// CHECK: vminmaxnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: vminmaxbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+          vminmaxbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
 
-// CHECK: vminmaxnepbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
+// CHECK: vminmaxbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x38,0x52,0x10,0x7b]
-          vminmaxnepbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
+          vminmaxbf16 ymm2, ymm3, word ptr [eax]{1to16}, 123
 
-// CHECK: vminmaxnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+// CHECK: vminmaxbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
-          vminmaxnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+          vminmaxbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
 
-// CHECK: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+// CHECK: vminmaxbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xaf,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+          vminmaxbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
 
-// CHECK: vminmaxnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
+// CHECK: vminmaxbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xbf,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
+          vminmaxbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}, 123
 
-// CHECK: vminmaxnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: vminmaxbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+          vminmaxbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
 
-// CHECK: vminmaxnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: vminmaxbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+          vminmaxbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
 
-// CHECK: vminmaxnepbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
+// CHECK: vminmaxbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x18,0x52,0x10,0x7b]
-          vminmaxnepbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
+          vminmaxbf16 xmm2, xmm3, word ptr [eax]{1to8}, 123
 
-// CHECK: vminmaxnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+// CHECK: vminmaxbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
-          vminmaxnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+          vminmaxbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
 
-// CHECK: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+// CHECK: vminmaxbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x8f,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+          vminmaxbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
 
-// CHECK: vminmaxnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
+// CHECK: vminmaxbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x9f,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
+          vminmaxbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}, 123
 
-// CHECK: vminmaxnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: vminmaxbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+          vminmaxbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
 
-// CHECK: vminmaxnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: vminmaxbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+          vminmaxbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
 
-// CHECK: vminmaxnepbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
+// CHECK: vminmaxbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x58,0x52,0x10,0x7b]
-          vminmaxnepbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
+          vminmaxbf16 zmm2, zmm3, word ptr [eax]{1to32}, 123
 
-// CHECK: vminmaxnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+// CHECK: vminmaxbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
-          vminmaxnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+          vminmaxbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
 
-// CHECK: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+// CHECK: vminmaxbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xcf,0x52,0x51,0x7f,0x7b]
-          vminmaxnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+          vminmaxbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
 
-// CHECK: vminmaxnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
+// CHECK: vminmaxbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
 // CHECK: encoding: [0x62,0xf3,0x67,0xdf,0x52,0x52,0x80,0x7b]
-          vminmaxnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
+          vminmaxbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}, 123
 
 // CHECK: vminmaxpd xmm2, xmm3, xmm4, 123
 // CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x52,0xd4,0x7b]
diff --git a/llvm/test/MC/X86/avx10.2minmax-64-att.s b/llvm/test/MC/X86/avx10.2minmax-64-att.s
index f58b4a51b995f..ad237d1fb2596 100644
--- a/llvm/test/MC/X86/avx10.2minmax-64-att.s
+++ b/llvm/test/MC/X86/avx10.2minmax-64-att.s
@@ -1,112 +1,112 @@
 // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
 
-// CHECK: vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22
+// CHECK: vminmaxbf16 $123, %xmm24, %xmm23, %xmm22
 // CHECK: encoding: [0x62,0x83,0x47,0x00,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22
+          vminmaxbf16 $123, %xmm24, %xmm23, %xmm22
 
-// CHECK: vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
 // CHECK: encoding: [0x62,0x83,0x47,0x07,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
+          vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0x83,0x47,0x87,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+          vminmaxbf16 $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22
+// CHECK: vminmaxbf16 $123, %zmm24, %zmm23, %zmm22
 // CHECK: encoding: [0x62,0x83,0x47,0x40,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22
+          vminmaxbf16 $123, %zmm24, %zmm23, %zmm22
 
-// CHECK: vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
 // CHECK: encoding: [0x62,0x83,0x47,0x47,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
+          vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0x83,0x47,0xc7,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+          vminmaxbf16 $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22
+// CHECK: vminmaxbf16 $123, %ymm24, %ymm23, %ymm22
 // CHECK: encoding: [0x62,0x83,0x47,0x20,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22
+          vminmaxbf16 $123, %ymm24, %ymm23, %ymm22
 
-// CHECK: vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
 // CHECK: encoding: [0x62,0x83,0x47,0x27,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
+          vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7}
 
-// CHECK: vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
 // CHECK: encoding: [0x62,0x83,0x47,0xa7,0x52,0xf0,0x7b]
-          vminmaxnepbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+          vminmaxbf16 $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: vminmaxbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
 // CHECK: encoding: [0x62,0xa3,0x47,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+          vminmaxbf16  $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
 
-// CHECK: vminmaxnepbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
 // CHECK: encoding: [0x62,0xc3,0x47,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+          vminmaxbf16  $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%rip){1to16}, %ymm23, %ymm22
+// CHECK: vminmaxbf16  $123, (%rip){1to16}, %ymm23, %ymm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x30,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, (%rip){1to16}, %ymm23, %ymm22
+          vminmaxbf16  $123, (%rip){1to16}, %ymm23, %ymm22
 
-// CHECK: vminmaxnepbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: vminmaxbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
+          vminmaxbf16  $123, -1024(,%rbp,2), %ymm23, %ymm22
 
-// CHECK: vminmaxnepbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0xa7,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+          vminmaxbf16  $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0xb7,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+          vminmaxbf16  $123, -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: vminmaxbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
 // CHECK: encoding: [0x62,0xa3,0x47,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+          vminmaxbf16  $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
 
-// CHECK: vminmaxnepbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
 // CHECK: encoding: [0x62,0xc3,0x47,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+          vminmaxbf16  $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%rip){1to8}, %xmm23, %xmm22
+// CHECK: vminmaxbf16  $123, (%rip){1to8}, %xmm23, %xmm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x10,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, (%rip){1to8}, %xmm23, %xmm22
+          vminmaxbf16  $123, (%rip){1to8}, %xmm23, %xmm22
 
-// CHECK: vminmaxnepbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: vminmaxbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
+          vminmaxbf16  $123, -512(,%rbp,2), %xmm23, %xmm22
 
-// CHECK: vminmaxnepbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0x87,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+          vminmaxbf16  $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0x97,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+          vminmaxbf16  $123, -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: vminmaxbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
 // CHECK: encoding: [0x62,0xa3,0x47,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+          vminmaxbf16  $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
 
-// CHECK: vminmaxnepbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: vminmaxbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
 // CHECK: encoding: [0x62,0xc3,0x47,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+          vminmaxbf16  $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
 
-// CHECK: vminmaxnepbf16  $123, (%rip){1to32}, %zmm23, %zmm22
+// CHECK: vminmaxbf16  $123, (%rip){1to32}, %zmm23, %zmm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x50,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16  $123, (%rip){1to32}, %zmm23, %zmm22
+          vminmaxbf16  $123, (%rip){1to32}, %zmm23, %zmm22
 
-// CHECK: vminmaxnepbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: vminmaxbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
 // CHECK: encoding: [0x62,0xe3,0x47,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
-          vminmaxnepbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
+          vminmaxbf16  $123, -2048(,%rbp,2), %zmm23, %zmm22
 
-// CHECK: vminmaxnepbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0xc7,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+          vminmaxbf16  $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
 
-// CHECK: vminmaxnepbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: vminmaxbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
 // CHECK: encoding: [0x62,0xe3,0x47,0xd7,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+          vminmaxbf16  $123, -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
 
 // CHECK: vminmaxpd $123, %xmm24, %xmm23, %xmm22
 // CHECK: encoding: [0x62,0x83,0xc5,0x00,0x52,0xf0,0x7b]
diff --git a/llvm/test/MC/X86/avx10.2minmax-64-intel.s b/llvm/test/MC/X86/avx10.2minmax-64-intel.s
index 8630d7f96165c..81c59332fac84 100644
--- a/llvm/test/MC/X86/avx10.2minmax-64-intel.s
+++ b/llvm/test/MC/X86/avx10.2minmax-64-intel.s
@@ -1,112 +1,112 @@
 // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: vminmaxnepbf16 xmm22, xmm23, xmm24, 123
+// CHECK: vminmaxbf16 xmm22, xmm23, xmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x00,0x52,0xf0,0x7b]
-          vminmaxnepbf16 xmm22, xmm23, xmm24, 123
+          vminmaxbf16 xmm22, xmm23, xmm24, 123
 
-// CHECK: vminmaxnepbf16 xmm22 {k7}, xmm23, xmm24, 123
+// CHECK: vminmaxbf16 xmm22 {k7}, xmm23, xmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x07,0x52,0xf0,0x7b]
-          vminmaxnepbf16 xmm22 {k7}, xmm23, xmm24, 123
+          vminmaxbf16 xmm22 {k7}, xmm23, xmm24, 123
 
-// CHECK: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
+// CHECK: vminmaxbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x87,0x52,0xf0,0x7b]
-          vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
+          vminmaxbf16 xmm22 {k7} {z}, xmm23, xmm24, 123
 
-// CHECK: vminmaxnepbf16 zmm22, zmm23, zmm24, 123
+// CHECK: vminmaxbf16 zmm22, zmm23, zmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x40,0x52,0xf0,0x7b]
-          vminmaxnepbf16 zmm22, zmm23, zmm24, 123
+          vminmaxbf16 zmm22, zmm23, zmm24, 123
 
-// CHECK: vminmaxnepbf16 zmm22 {k7}, zmm23, zmm24, 123
+// CHECK: vminmaxbf16 zmm22 {k7}, zmm23, zmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x47,0x52,0xf0,0x7b]
-          vminmaxnepbf16 zmm22 {k7}, zmm23, zmm24, 123
+          vminmaxbf16 zmm22 {k7}, zmm23, zmm24, 123
 
-// CHECK: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
+// CHECK: vminmaxbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0xc7,0x52,0xf0,0x7b]
-          vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
+          vminmaxbf16 zmm22 {k7} {z}, zmm23, zmm24, 123
 
-// CHECK: vminmaxnepbf16 ymm22, ymm23, ymm24, 123
+// CHECK: vminmaxbf16 ymm22, ymm23, ymm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x20,0x52,0xf0,0x7b]
-          vminmaxnepbf16 ymm22, ymm23, ymm24, 123
+          vminmaxbf16 ymm22, ymm23, ymm24, 123
 
-// CHECK: vminmaxnepbf16 ymm22 {k7}, ymm23, ymm24, 123
+// CHECK: vminmaxbf16 ymm22 {k7}, ymm23, ymm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0x27,0x52,0xf0,0x7b]
-          vminmaxnepbf16 ymm22 {k7}, ymm23, ymm24, 123
+          vminmaxbf16 ymm22 {k7}, ymm23, ymm24, 123
 
-// CHECK: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
+// CHECK: vminmaxbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
 // CHECK: encoding: [0x62,0x83,0x47,0xa7,0x52,0xf0,0x7b]
-          vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
+          vminmaxbf16 ymm22 {k7} {z}, ymm23, ymm24, 123
 
-// CHECK: vminmaxnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: vminmaxbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
 // CHECK: encoding: [0x62,0xa3,0x47,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+          vminmaxbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
 
-// CHECK: vminmaxnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: vminmaxbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
 // CHECK: encoding: [0x62,0xc3,0x47,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+          vminmaxbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
 
-// CHECK: vminmaxnepbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
+// CHECK: vminmaxbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x30,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
+          vminmaxbf16 ymm22, ymm23, word ptr [rip]{1to16}, 123
 
-// CHECK: vminmaxnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
+// CHECK: vminmaxbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
-          vminmaxnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
+          vminmaxbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
 
-// CHECK: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
+// CHECK: vminmaxbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0xa7,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
+          vminmaxbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
 
-// CHECK: vminmaxnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
+// CHECK: vminmaxbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0xb7,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
+          vminmaxbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}, 123
 
-// CHECK: vminmaxnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: vminmaxbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
 // CHECK: encoding: [0x62,0xa3,0x47,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+          vminmaxbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
 
-// CHECK: vminmaxnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: vminmaxbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
 // CHECK: encoding: [0x62,0xc3,0x47,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+          vminmaxbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
 
-// CHECK: vminmaxnepbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
+// CHECK: vminmaxbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x10,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
+          vminmaxbf16 xmm22, xmm23, word ptr [rip]{1to8}, 123
 
-// CHECK: vminmaxnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
+// CHECK: vminmaxbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
-          vminmaxnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
+          vminmaxbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
 
-// CHECK: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
+// CHECK: vminmaxbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x87,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
+          vminmaxbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
 
-// CHECK: vminmaxnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
+// CHECK: vminmaxbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x97,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
+          vminmaxbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}, 123
 
-// CHECK: vminmaxnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: vminmaxbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
 // CHECK: encoding: [0x62,0xa3,0x47,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
-          vminmaxnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+          vminmaxbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
 
-// CHECK: vminmaxnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: vminmaxbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
 // CHECK: encoding: [0x62,0xc3,0x47,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
-          vminmaxnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+          vminmaxbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
 
-// CHECK: vminmaxnepbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
+// CHECK: vminmaxbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x50,0x52,0x35,0x00,0x00,0x00,0x00,0x7b]
-          vminmaxnepbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
+          vminmaxbf16 zmm22, zmm23, word ptr [rip]{1to32}, 123
 
-// CHECK: vminmaxnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
+// CHECK: vminmaxbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
-          vminmaxnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
+          vminmaxbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
 
-// CHECK: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
+// CHECK: vminmaxbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
 // CHECK: encoding: [0x62,0xe3,0x47,0xc7,0x52,0x71,0x7f,0x7b]
-          vminmaxnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
+          vminmaxbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
 
-// CHECK: vminmaxnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
+// CHECK: vminmaxbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
 // CHECK: encoding: [0x62,0xe3,0x47,0xd7,0x52,0x72,0x80,0x7b]
-          vminmaxnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
+          vminmaxbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}, 123
 
 // CHECK: vminmaxpd xmm22, xmm23, xmm24, 123
 // CHECK: encoding: [0x62,0x83,0xc5,0x00,0x52,0xf0,0x7b]
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/ambiguous-constraints.td b/llvm/test/TableGen/SDNodeInfoEmitter/ambiguous-constraints.td
new file mode 100644
index 0000000000000..668464190e6d8
--- /dev/null
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/ambiguous-constraints.td
@@ -0,0 +1,73 @@
+// RUN: split-file %s %t
+
+//--- test1.td
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %t/test1.td | FileCheck %t/test1.td
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def my_node_a : SDNode<"MyTargetISD::NODE", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>>;
+def my_node_b : SDNode<"MyTargetISD::NODE", SDTypeProfile<1, 0, [SDTCisVT<0, f32>]>>;
+
+// CHECK:       enum GenNodeType : unsigned {
+// CHECK-NEXT:    NODE = ISD::BUILTIN_OP_END,
+// CHECK-NEXT:  };
+
+// CHECK:       static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "MyTargetISD::NODE\0"
+// CHECK-NEXT:    "\0";
+
+// CHECK:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:      {1, 0, 0, 0, 0, 0, 0, 0}, // NODE
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:      /*NumOpcodes=*/1, MyTargetSDNodeDescs,
+// CHECK-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
+
+
+//--- test2.td
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %t/test2.td | FileCheck %t/test2.td
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def my_node_1a : SDNode<"MyTargetISD::NODE_1", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>>;
+def my_node_1b : SDNode<"MyTargetISD::NODE_1", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>>;
+def my_node_2a : SDNode<"MyTargetISD::NODE_2", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>>;
+def my_node_2b : SDNode<"MyTargetISD::NODE_2", SDTypeProfile<1, 0, [SDTCisVT<0, untyped>]>>;
+
+// CHECK:       namespace llvm::MyTargetISD {
+// CHECK-EMPTY:
+// CHECK-NEXT:  enum GenNodeType : unsigned {
+// CHECK-NEXT:    NODE_1 = ISD::BUILTIN_OP_END,
+// CHECK-NEXT:    NODE_2,
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static constexpr unsigned GENERATED_OPCODE_END = NODE_2 + 1;
+// CHECK-EMPTY:
+// CHECK-NEXT:  } // namespace llvm::MyTargetISD
+
+// CHECK:       static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "MyTargetISD::NODE_1\0"
+// CHECK-NEXT:    "MyTargetISD::NODE_2\0"
+// CHECK-NEXT:    "\0";
+
+// CHECK:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* 0 */ {SDTCisVT, 0, 0, MVT::i32},
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:      {1, 0, 0, 0, 0, 0, 0, 1}, // NODE_1
+// CHECK-NEXT:      {1, 0, 0, 0, 0, 20, 0, 0}, // NODE_2
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:      /*NumOpcodes=*/2, MyTargetSDNodeDescs,
+// CHECK-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
new file mode 100644
index 0000000000000..5332b4f458dfd
--- /dev/null
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
@@ -0,0 +1,183 @@
+// RUN: split-file %s %t
+
+//--- no-nodes.td
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %t/no-nodes.td \
+// RUN:   | FileCheck %t/no-nodes.td
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+// CHECK:       #ifdef GET_SDNODE_ENUM
+// CHECK-NEXT:  #undef GET_SDNODE_ENUM
+// CHECK-EMPTY:
+// CHECK-NEXT:  namespace llvm::MyTargetISD {
+// CHECK-EMPTY:
+// CHECK-NEXT:  static constexpr unsigned GENERATED_OPCODE_END = ISD::BUILTIN_OP_END;
+// CHECK-EMPTY:
+// CHECK-NEXT:  } // namespace llvm::MyTargetISD
+// CHECK-EMPTY:
+// CHECK-NEXT:  #endif // GET_SDNODE_ENUM
+// CHECK-EMPTY:
+// CHECK-NEXT:  #ifdef GET_SDNODE_DESC
+// CHECK-NEXT:  #undef GET_SDNODE_DESC
+// CHECK-EMPTY:
+// CHECK-NEXT:  namespace llvm {
+// CHECK-EMPTY:
+// CHECK-NEXT:  #ifdef __GNUC__
+// CHECK-NEXT:  #pragma GCC diagnostic push
+// CHECK-NEXT:  #pragma GCC diagnostic ignored "-Woverlength-strings"
+// CHECK-NEXT:  #endif
+// CHECK-NEXT:  static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "\0";
+// CHECK-NEXT:  #ifdef __GNUC__
+// CHECK-NEXT:  #pragma GCC diagnostic pop
+// CHECK-NEXT:  #endif
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:      /*NumOpcodes=*/0, MyTargetSDNodeDescs,
+// CHECK-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
+// CHECK-EMPTY:
+// CHECK-NEXT:  } // namespace llvm
+// CHECK-EMPTY:
+// CHECK-NEXT:  #endif // GET_SDNODE_DESC
+
+
+//--- trivial-node.td
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %t/trivial-node.td \
+// RUN:   | FileCheck %t/trivial-node.td
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def my_noop : SDNode<"MyTargetISD::NOOP", SDTypeProfile<0, 0, []>>;
+
+// CHECK:       namespace llvm::MyTargetISD {
+// CHECK-EMPTY:
+// CHECK-NEXT:  enum GenNodeType : unsigned {
+// CHECK-NEXT:    NOOP = ISD::BUILTIN_OP_END,
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static constexpr unsigned GENERATED_OPCODE_END = NOOP + 1;
+// CHECK-EMPTY:
+// CHECK-NEXT:  } // namespace llvm::MyTargetISD
+
+// CHECK:       static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "MyTargetISD::NOOP\0"
+// CHECK-NEXT:    "\0";
+
+// CHECK:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:      {0, 0, 0, 0, 0, 0, 0, 0}, // NOOP
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:      /*NumOpcodes=*/1, MyTargetSDNodeDescs,
+// CHECK-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
+
+//--- advanced.td
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %t/advanced.td \
+// RUN:   | FileCheck %t/advanced.td
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def my_node_1 : SDNode<
+    "MyTargetISD::NODE_1",
+    SDTypeProfile<1, 1, [SDTCisVT<0, i1>, SDTCisVT<1, i2>]>,
+    [SDNPHasChain]
+>;
+
+let TSFlags = 42 in
+def my_node_2 : SDNode<
+    "MyTargetISD::NODE_2",
+    SDTypeProfile<3, 1, [
+        // Prefix of my_node_3 constraints.
+        SDTCisVT<0, i1>,
+        SDTCisPtrTy<1>,
+        SDTCisInt<2>,
+        SDTCisFP<3>,
+    ]>,
+    [SDNPMayStore, SDNPMayLoad, SDNPSideEffect,
+     SDNPMemOperand, SDNPVariadic]
+>;
+
+let IsStrictFP = true, TSFlags = 24 in
+def my_node_3 : SDNode<
+    "MyTargetISD::NODE_3",
+    SDTypeProfile<2, -1, [
+        SDTCisVT<0, i1>,
+        SDTCisPtrTy<1>,
+        SDTCisInt<2>,
+        SDTCisFP<3>,
+        SDTCisVec<4>,
+        SDTCisSameAs<6, 5>,
+        SDTCisVTSmallerThanOp<8, 7>,
+        SDTCisOpSmallerThanOp<10, 9>,
+        SDTCisEltOfVec<12, 11>,
+        SDTCisSubVecOfVec<14, 13>,
+        SDTCVecEltisVT<15, i32>,
+        SDTCisSameNumEltsAs<17, 16>,
+        SDTCisSameSizeAs<19, 18>,
+    ]>,
+    [SDNPCommutative, SDNPAssociative, SDNPHasChain,
+     SDNPOutGlue, SDNPInGlue, SDNPOptInGlue]
+>;
+
+// CHECK:       namespace llvm::MyTargetISD {
+// CHECK-EMPTY:
+// CHECK-NEXT:  enum GenNodeType : unsigned {
+// CHECK-NEXT:    NODE_1 = ISD::BUILTIN_OP_END,
+// CHECK-NEXT:    NODE_2,
+// CHECK-NEXT:    NODE_3,
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static constexpr unsigned GENERATED_OPCODE_END = NODE_3 + 1;
+// CHECK-EMPTY:
+// CHECK-NEXT:  } // namespace llvm::MyTargetISD
+
+// CHECK:       static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "MyTargetISD::NODE_1\0"
+// CHECK-NEXT:    "MyTargetISD::NODE_2\0"
+// CHECK-NEXT:    "MyTargetISD::NODE_3\0"
+// CHECK-NEXT:    "\0";
+
+// CHECK:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* 0 */ {SDTCisVT, 1, 0, MVT::i2},
+// CHECK-SAME:            {SDTCisVT, 0, 0, MVT::i1},
+// CHECK-NEXT:    /* 2 */ {SDTCisSameSizeAs, 19, 18, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisSameNumEltsAs, 17, 16, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCVecEltisVT, 15, 0, MVT::i32},
+// CHECK-SAME:            {SDTCisSubVecOfVec, 14, 13, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisEltOfVec, 12, 11, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisOpSmallerThanOp, 10, 9, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisVTSmallerThanOp, 8, 7, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisSameAs, 6, 5, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisVec, 4, 0, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisFP, 3, 0, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisInt, 2, 0, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisPtrTy, 1, 0, MVT::INVALID_SIMPLE_VALUE_TYPE},
+// CHECK-SAME:            {SDTCisVT, 0, 0, MVT::i1},
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:          {1, 1, 0|1<<SDNPHasChain, 0, 0, 0, 0, 2}, // NODE_1
+// CHECK-NEXT:          {3, 1, 0|1<<SDNPVariadic|1<<SDNPMemOperand, 0, 42, 20, 11, 4}, // NODE_2
+// CHECK-NEXT:          {2, -1, 0|1<<SDNPHasChain|1<<SDNPOutGlue|1<<SDNPInGlue|1<<SDNPOptInGlue, 0|1<<SDNFIsStrictFP, 24, 40, 2, 13}, // NODE_3
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:     /*NumOpcodes=*/3, MyTargetSDNodeDescs,
+// CHECK-NEXT:     MyTargetSDNodeNames, MyTargetSDTypeConstraints);
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/namespace.td b/llvm/test/TableGen/SDNodeInfoEmitter/namespace.td
new file mode 100644
index 0000000000000..844c12bd182fc
--- /dev/null
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/namespace.td
@@ -0,0 +1,62 @@
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %s -sdnode-namespace=EmptyISD \
+// RUN:   | FileCheck %s -check-prefix=EMPTY
+
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %s \
+// RUN:   | FileCheck %s --check-prefixes=COMMON,TARGET -DNS=MyTargetISD
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %s -sdnode-namespace=MyCustomISD \
+// RUN:   | FileCheck %s -check-prefixes=COMMON,CUSTOM -DNS=MyCustomISD
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def node_1 : SDNode<"MyTargetISD::NODE", SDTypeProfile<1, 0, [SDTCisVT<0, i1>]>>;
+def node_2 : SDNode<"MyCustomISD::NODE", SDTypeProfile<0, 1, [SDTCisVT<0, i2>]>>;
+
+// EMPTY:        namespace llvm::EmptyISD {
+// EMPTY-EMPTY:
+// EMPTY-NEXT:   static constexpr unsigned GENERATED_OPCODE_END = ISD::BUILTIN_OP_END;
+// EMPTY-EMPTY:
+// EMPTY-NEXT:   } // namespace llvm::EmptyISD
+
+// EMPTY:        static const char MyTargetSDNodeNames[] =
+// EMPTY-NEXT:     "\0";
+
+// EMPTY:        static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// EMPTY-NEXT:     /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
+// EMPTY-NEXT:   };
+// EMPTY-EMPTY:
+// EMPTY-NEXT:   static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// EMPTY-NEXT:   };
+// EMPTY-EMPTY:
+// EMPTY-NEXT:   static const SDNodeInfo MyTargetGenSDNodeInfo(
+// EMPTY-NEXT:       /*NumOpcodes=*/0, MyTargetSDNodeDescs,
+// EMPTY-NEXT:       MyTargetSDNodeNames, MyTargetSDTypeConstraints);
+
+// COMMON:       namespace llvm::[[NS]] {
+// COMMON-EMPTY:
+// COMMON-NEXT:  enum GenNodeType : unsigned {
+// COMMON-NEXT:    NODE = ISD::BUILTIN_OP_END,
+// COMMON-NEXT:  };
+// COMMON-EMPTY:
+// COMMON-NEXT:  static constexpr unsigned GENERATED_OPCODE_END = NODE + 1;
+// COMMON-EMPTY:
+// COMMON-NEXT:  } // namespace llvm::[[NS]]
+
+// COMMON:       static const char MyTargetSDNodeNames[] =
+// COMMON-NEXT:    "[[NS]]::NODE\0"
+// COMMON-NEXT:    "\0";
+
+// COMMON:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// TARGET-NEXT:    /* 0 */ {SDTCisVT, 0, 0, MVT::i1},
+// CUSTOM-NEXT:    /* 0 */ {SDTCisVT, 0, 0, MVT::i2},
+// COMMON-NEXT:  };
+// COMMON-EMPTY:
+// COMMON-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// TARGET-NEXT:      {1, 0, 0, 0, 0, 0, 0, 1}, // NODE
+// CUSTOM-NEXT:      {0, 1, 0, 0, 0, 0, 0, 1}, // NODE
+// COMMON-NEXT:  };
+// COMMON-EMPTY:
+// COMMON-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// COMMON-NEXT:      /*NumOpcodes=*/1, MyTargetSDNodeDescs,
+// COMMON-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/skipped-nodes.td b/llvm/test/TableGen/SDNodeInfoEmitter/skipped-nodes.td
new file mode 100644
index 0000000000000..ed278f262ca8f
--- /dev/null
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/skipped-nodes.td
@@ -0,0 +1,91 @@
+// RUN: llvm-tblgen -gen-sd-node-info -I %p/../../../include %s 2> %t.warn | FileCheck %s
+// RUN: FileCheck --check-prefix=WARN --implicit-check-not=warning %s < %t.warn
+
+// RUN: llvm-tblgen -gen-sd-node-info -warn-on-skipped-nodes=false \
+// RUN:   -I %p/../../../include %s 2> %t.nowarn | FileCheck %s
+// RUN: not test -s %t.nowarn
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+// WARN: [[#@LINE+1]]:5: warning: skipped node: invalid enum name
+def bad_name_1 : SDNode<"", SDTypeProfile<0, 0, []>>;
+
+// WARN: [[#@LINE+1]]:5: warning: skipped node: invalid enum name
+def bad_name_2 : SDNode<"NODE", SDTypeProfile<0, 0, []>>;
+
+// WARN: [[#@LINE+1]]:5: warning: skipped node: invalid enum name
+def bad_name_3 : SDNode<"MyTargetISD::", SDTypeProfile<0, 0, []>>;
+
+// WARN: [[#@LINE+1]]:5: warning: skipped node: invalid enum name
+def bad_name_4 : SDNode<"MyISD::", SDTypeProfile<0, 0, []>>;
+
+// WARN: [[#@LINE+1]]:5: warning: skipped node: invalid enum name
+def bad_name_5 : SDNode<"::NODE", SDTypeProfile<0, 0, []>>;
+
+
+// Standard namespace.
+def silent_1 : SDNode<"ISD::SILENT", SDTypeProfile<0, 0, []>>;
+
+// Different namespace.
+def silent_2 : SDNode<"MyISD::SILENT", SDTypeProfile<0, 0, []>>;
+
+
+// Different number of results.
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+def node_1a : SDNode<"MyTargetISD::NODE_1", SDTypeProfile<0, 0, []>>;
+def node_1b : SDNode<"MyTargetISD::NODE_1", SDTypeProfile<1, 0, []>>;
+
+// Different number of operands.
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+def node_2a : SDNode<"MyTargetISD::NODE_2", SDTypeProfile<0, 0, []>>;
+def node_2b : SDNode<"MyTargetISD::NODE_2", SDTypeProfile<0, 1, []>>;
+
+// Different value of IsStrictFP.
+// WARN: [[#@LINE+3]]:5: warning: skipped node: incompatible description
+// WARN: [[#@LINE+3]]:5: warning: skipped node: incompatible description
+let IsStrictFP = true in
+def node_3a : SDNode<"MyTargetISD::NODE_3", SDTypeProfile<0, 0, []>>;
+def node_3b : SDNode<"MyTargetISD::NODE_3", SDTypeProfile<0, 0, []>>;
+
+// Different value of TSFlags.
+// WARN: [[#@LINE+3]]:5: warning: skipped node: incompatible description
+// WARN: [[#@LINE+3]]:5: warning: skipped node: incompatible description
+let TSFlags = 1 in
+def node_4a : SDNode<"MyTargetISD::NODE_4", SDTypeProfile<0, 0, []>>;
+def node_4b : SDNode<"MyTargetISD::NODE_4", SDTypeProfile<0, 0, []>>;
+
+// Different properties.
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+// WARN: [[#@LINE+2]]:5: warning: skipped node: incompatible description
+def node_5a : SDNode<"MyTargetISD::NODE_5", SDTypeProfile<0, 0, []>>;
+def node_5b : SDNode<"MyTargetISD::NODE_5", SDTypeProfile<0, 0, []>, [SDNPHasChain]>;
+
+
+// CHECK:       enum GenNodeType : unsigned {
+// CHECK-NEXT:    COMPAT = ISD::BUILTIN_OP_END,
+// CHECK-NEXT:  };
+
+// CHECK:       static const char MyTargetSDNodeNames[] =
+// CHECK-NEXT:    "MyTargetISD::COMPAT\0"
+// CHECK-NEXT:    "\0";
+
+// CHECK:       static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
+// CHECK-NEXT:    /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeDesc MyTargetSDNodeDescs[] = {
+// CHECK-NEXT:      {1, -1, 0, 0, 0, 0, 0, 0}, // COMPAT
+// CHECK-NEXT:  };
+// CHECK-EMPTY:
+// CHECK-NEXT:  static const SDNodeInfo MyTargetGenSDNodeInfo(
+// CHECK-NEXT:      /*NumOpcodes=*/1, MyTargetSDNodeDescs,
+// CHECK-NEXT:      MyTargetSDNodeNames, MyTargetSDTypeConstraints);
+
+def compat_a : SDNode<"MyTargetISD::COMPAT", SDTypeProfile<1, -1, []>>;
+def compat_b : SDNode<"MyTargetISD::COMPAT", SDTypeProfile<1, -1, [SDTCisVT<0, untyped>]>>;
+def compat_c : SDNode<"MyTargetISD::COMPAT", SDTypeProfile<1, -1, [SDTCisVT<0, untyped>]>,
+    [SDNPCommutative, SDNPAssociative, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 954c05bdb2076..cc49ed9b94d55 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -3073,9 +3073,9 @@ static const X86FoldTableEntry Table2[] = {
   {X86::VMINCSHZrr, X86::VMINCSHZrm, 0},
   {X86::VMINCSSZrr, X86::VMINCSSZrm, 0},
   {X86::VMINCSSrr, X86::VMINCSSrm, 0},
-  {X86::VMINMAXNEPBF16Z128rri, X86::VMINMAXNEPBF16Z128rmi, 0},
-  {X86::VMINMAXNEPBF16Z256rri, X86::VMINMAXNEPBF16Z256rmi, 0},
-  {X86::VMINMAXNEPBF16Zrri, X86::VMINMAXNEPBF16Zrmi, 0},
+  {X86::VMINMAXBF16Z128rri, X86::VMINMAXBF16Z128rmi, 0},
+  {X86::VMINMAXBF16Z256rri, X86::VMINMAXBF16Z256rmi, 0},
+  {X86::VMINMAXBF16Zrri, X86::VMINMAXBF16Zrmi, 0},
   {X86::VMINMAXPDZ128rri, X86::VMINMAXPDZ128rmi, 0},
   {X86::VMINMAXPDZ256rri, X86::VMINMAXPDZ256rmi, 0},
   {X86::VMINMAXPDZrri, X86::VMINMAXPDZrmi, 0},
@@ -5122,9 +5122,9 @@ static const X86FoldTableEntry Table3[] = {
   {X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0},
   {X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0},
   {X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0},
-  {X86::VMINMAXNEPBF16Z128rrikz, X86::VMINMAXNEPBF16Z128rmikz, 0},
-  {X86::VMINMAXNEPBF16Z256rrikz, X86::VMINMAXNEPBF16Z256rmikz, 0},
-  {X86::VMINMAXNEPBF16Zrrikz, X86::VMINMAXNEPBF16Zrmikz, 0},
+  {X86::VMINMAXBF16Z128rrikz, X86::VMINMAXBF16Z128rmikz, 0},
+  {X86::VMINMAXBF16Z256rrikz, X86::VMINMAXBF16Z256rmikz, 0},
+  {X86::VMINMAXBF16Zrrikz, X86::VMINMAXBF16Zrmikz, 0},
   {X86::VMINMAXPDZ128rrikz, X86::VMINMAXPDZ128rmikz, 0},
   {X86::VMINMAXPDZ256rrikz, X86::VMINMAXPDZ256rmikz, 0},
   {X86::VMINMAXPDZrrikz, X86::VMINMAXPDZrmikz, 0},
@@ -6744,9 +6744,9 @@ static const X86FoldTableEntry Table4[] = {
   {X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0},
   {X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0},
   {X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0},
-  {X86::VMINMAXNEPBF16Z128rrik, X86::VMINMAXNEPBF16Z128rmik, 0},
-  {X86::VMINMAXNEPBF16Z256rrik, X86::VMINMAXNEPBF16Z256rmik, 0},
-  {X86::VMINMAXNEPBF16Zrrik, X86::VMINMAXNEPBF16Zrmik, 0},
+  {X86::VMINMAXBF16Z128rrik, X86::VMINMAXBF16Z128rmik, 0},
+  {X86::VMINMAXBF16Z256rrik, X86::VMINMAXBF16Z256rmik, 0},
+  {X86::VMINMAXBF16Zrrik, X86::VMINMAXBF16Zrmik, 0},
   {X86::VMINMAXPDZ128rrik, X86::VMINMAXPDZ128rmik, 0},
   {X86::VMINMAXPDZ256rrik, X86::VMINMAXPDZ256rmik, 0},
   {X86::VMINMAXPDZrrik, X86::VMINMAXPDZrmik, 0},
@@ -8203,9 +8203,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS},
   {X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS},
   {X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS},
-  {X86::VMINMAXNEPBF16Z128rri, X86::VMINMAXNEPBF16Z128rmbi, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Z256rri, X86::VMINMAXNEPBF16Z256rmbi, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Zrri, X86::VMINMAXNEPBF16Zrmbi, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z128rri, X86::VMINMAXBF16Z128rmbi, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z256rri, X86::VMINMAXBF16Z256rmbi, TB_BCAST_SH},
+  {X86::VMINMAXBF16Zrri, X86::VMINMAXBF16Zrmbi, TB_BCAST_SH},
   {X86::VMINMAXPDZ128rri, X86::VMINMAXPDZ128rmbi, TB_BCAST_SD},
   {X86::VMINMAXPDZ256rri, X86::VMINMAXPDZ256rmbi, TB_BCAST_SD},
   {X86::VMINMAXPDZrri, X86::VMINMAXPDZrmbi, TB_BCAST_SD},
@@ -9231,9 +9231,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmbkz, TB_BCAST_SS},
   {X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmbkz, TB_BCAST_SS},
   {X86::VMINCPSZrrkz, X86::VMINCPSZrmbkz, TB_BCAST_SS},
-  {X86::VMINMAXNEPBF16Z128rrikz, X86::VMINMAXNEPBF16Z128rmbikz, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Z256rrikz, X86::VMINMAXNEPBF16Z256rmbikz, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Zrrikz, X86::VMINMAXNEPBF16Zrmbikz, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z128rrikz, X86::VMINMAXBF16Z128rmbikz, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z256rrikz, X86::VMINMAXBF16Z256rmbikz, TB_BCAST_SH},
+  {X86::VMINMAXBF16Zrrikz, X86::VMINMAXBF16Zrmbikz, TB_BCAST_SH},
   {X86::VMINMAXPDZ128rrikz, X86::VMINMAXPDZ128rmbikz, TB_BCAST_SD},
   {X86::VMINMAXPDZ256rrikz, X86::VMINMAXPDZ256rmbikz, TB_BCAST_SD},
   {X86::VMINMAXPDZrrikz, X86::VMINMAXPDZrmbikz, TB_BCAST_SD},
@@ -10302,9 +10302,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
   {X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmbk, TB_BCAST_SS},
   {X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmbk, TB_BCAST_SS},
   {X86::VMINCPSZrrk, X86::VMINCPSZrmbk, TB_BCAST_SS},
-  {X86::VMINMAXNEPBF16Z128rrik, X86::VMINMAXNEPBF16Z128rmbik, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Z256rrik, X86::VMINMAXNEPBF16Z256rmbik, TB_BCAST_SH},
-  {X86::VMINMAXNEPBF16Zrrik, X86::VMINMAXNEPBF16Zrmbik, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z128rrik, X86::VMINMAXBF16Z128rmbik, TB_BCAST_SH},
+  {X86::VMINMAXBF16Z256rrik, X86::VMINMAXBF16Z256rmbik, TB_BCAST_SH},
+  {X86::VMINMAXBF16Zrrik, X86::VMINMAXBF16Zrmbik, TB_BCAST_SH},
   {X86::VMINMAXPDZ128rrik, X86::VMINMAXPDZ128rmbik, TB_BCAST_SD},
   {X86::VMINMAXPDZ256rrik, X86::VMINMAXPDZ256rmbik, TB_BCAST_SD},
   {X86::VMINMAXPDZrrik, X86::VMINMAXPDZrmbik, TB_BCAST_SD},
diff --git a/llvm/test/Transforms/HipStdPar/allocation-interposition.ll b/llvm/test/Transforms/HipStdPar/allocation-interposition.ll
index 291b06ed0ca9e..9ec284b1dedb7 100644
--- a/llvm/test/Transforms/HipStdPar/allocation-interposition.ll
+++ b/llvm/test/Transforms/HipStdPar/allocation-interposition.ll
@@ -14,6 +14,8 @@ declare i32 @__hipstdpar_posix_aligned_alloc(ptr, i64, i64)
 
 declare void @__hipstdpar_hidden_free(ptr)
 
+declare ptr @__hipstdpar_hidden_malloc(i64)
+
 declare ptr @__hipstdpar_realloc(ptr, i64)
 
 declare ptr @__hipstdpar_realloc_array(ptr, i64, i64)
diff --git a/llvm/test/Transforms/JumpThreading/thread-debug-info.ll b/llvm/test/Transforms/JumpThreading/thread-debug-info.ll
index 4727413b35a60..5a338593e5691 100644
--- a/llvm/test/Transforms/JumpThreading/thread-debug-info.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-debug-info.ll
@@ -96,6 +96,8 @@ exit:                                             ; preds = %bb.f4, %bb.f3, %bb.
 ; being threaded, the `and` in the function below is optimised away, but its
 ; debug-info should still be preserved.
 ; Similarly, the call to f1 gets cloned, its dbg.value should be cloned too.
+; Duplicated debug value in land.end.thr_comm is removed by
+; RemoveRedundantDbgInstrs pass at the end.
 define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp !dbg !30 {
 ; CHECK-LABEL: define void @test16(i1
 entry:
@@ -109,7 +111,6 @@ lor.lhs.false.i:
   br i1 %c3, label %land.end, label %land.end, !dbg !33
 
 ; CHECK-LABEL: land.end.thr_comm:
-; CHECK-NEXT:  #dbg_value(i32 0,
 ; CHECK-NEXT:  #dbg_value(i32 1,
 ; CHECK-NEXT:  call void @f1()
 ; CHECK-NEXT:  br i1 %c4,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses-cost.ll
new file mode 100644
index 0000000000000..564fba65e6238
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses-cost.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -mtriple=aarch64-none-linux-gnu -S %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+
+; Test cases for modeling interleave access costs.
+
+; Function Attrs: vscale_range(2,2)
+define void @test_masked_interleave(ptr noalias %A, ptr noalias %B, ptr noalias %C) #0 {
+; CHECK-LABEL: define void @test_masked_interleave(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[IV_1:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[GEP_A_1:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV_1]]
+; CHECK-NEXT:    [[L_1:%.*]] = load i8, ptr [[GEP_A_1]], align 1
+; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L_1]], 0
+; CHECK-NEXT:    br i1 [[C_1]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[IV_2:%.*]] = or disjoint i64 [[IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV_2]]
+; CHECK-NEXT:    [[L_2:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT:    [[CONV8:%.*]] = zext i8 [[L_2]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV8]], 2
+; CHECK-NEXT:    [[ADD9:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr i8, ptr [[A]], i64 [[ADD9]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX10]], align 1
+; CHECK-NEXT:    [[CONV11:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[SHL12:%.*]] = shl i32 [[CONV11]], 2
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL12]], [[SHL]]
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[B2]], align 1
+; CHECK-NEXT:    [[CONV15:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[OR16:%.*]] = or i32 [[OR]], [[CONV15]]
+; CHECK-NEXT:    [[SHL17:%.*]] = shl i32 [[OR16]], 2
+; CHECK-NEXT:    [[CONV19:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT:    [[ADD20:%.*]] = or i32 3, [[CONV19]]
+; CHECK-NEXT:    [[DEST_0:%.*]] = or i32 [[SHL17]], [[ADD20]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[DEST_0]], 2
+; CHECK-NEXT:    [[SHR26:%.*]] = lshr i32 [[CONV8]], 2
+; CHECK-NEXT:    [[CONV27:%.*]] = trunc i32 [[SHR26]] to i8
+; CHECK-NEXT:    store i8 [[CONV27]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT:    [[SHR30:%.*]] = lshr i32 [[CONV8]], 5
+; CHECK-NEXT:    [[CONV31:%.*]] = trunc i32 [[SHR30]] to i8
+; CHECK-NEXT:    store i8 [[CONV31]], ptr [[C]], align 1
+; CHECK-NEXT:    [[CONV34:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT:    store i8 [[CONV34]], ptr [[B]], align 1
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp ugt i64 [[IV]], 1000
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
+  %iv.1 = or disjoint i64 %iv, 1
+  %gep.A.1 = getelementptr i8, ptr %A, i64 %iv.1
+  %l.1 = load i8, ptr %gep.A.1, align 1
+  %c.1 = icmp eq i8 %l.1, 0
+  br i1 %c.1, label %then, label %loop.latch
+
+then:
+  %iv.2 = or disjoint i64 %iv, 2
+  %arrayidx7 = getelementptr i8, ptr %A, i64 %iv.2
+  %l.2 = load i8, ptr %arrayidx7, align 1
+  %conv8 = zext i8 %l.2 to i32
+  %shl = shl i32 %conv8, 2
+  %add9 = or disjoint i64 %iv, 1
+  %arrayidx10 = getelementptr i8, ptr %A, i64 %add9
+  %2 = load i8, ptr %arrayidx10, align 1
+  %conv11 = zext i8 %2 to i32
+  %shl12 = shl i32 %conv11, 2
+  %or = or i32 %shl12, %shl
+  %B2 = getelementptr i8, ptr %A, i64 %iv
+  %3 = load i8, ptr %B2, align 1
+  %conv15 = zext i8 %3 to i32
+  %or16 = or i32 %or, %conv15
+  %shl17 = shl i32 %or16, 2
+  %conv19 = trunc i64 %iv to i32
+  %add20 = or i32 3, %conv19
+  %dest.0 = or i32 %shl17, %add20
+  %shr = lshr i32 %dest.0, 2
+  %shr26 = lshr i32 %conv8, 2
+  %conv27 = trunc i32 %shr26 to i8
+  store i8 %conv27, ptr %arrayidx7, align 1
+  %shr30 = lshr i32 %conv8, 5
+  %conv31 = trunc i32 %shr30 to i8
+  store i8 %conv31, ptr %C, align 1
+  %conv34 = trunc i32 %shr to i8
+  store i8 %conv34, ptr %B, align 1
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add i64 %iv, 4
+  %ec = icmp ugt i64 %iv, 1000
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+attributes #0 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 05c0bc0761ea4..bf95622733461 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -396,8 +396,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
 ; CHECK-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP10]])
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
 ; CHECK-NEXT:    [[REVERSE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP11]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE1]], [[VEC_IND]]
@@ -1548,263 +1548,5 @@ end:
   ret void
 }
 
-; Check vectorization on an interleaved load/store groups of factor 4
-
-; for (int i = 0; i < 1024; ++i) {
-;   dst[i].x = a[i].x + b[i].x;
-;   dst[i].y = a[i].y - b[i].y;
-;   dst[i].z = a[i].z << b[i].z;
-;   dst[i].t = a[i].t >> b[i].t;
-; }
-%struct.xyzt = type { i32, i32, i32, i32 }
-
-define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a, ptr readonly %b) {
-; CHECK-LABEL: @interleave_deinterleave(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP1]], 1024
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 2
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub nuw nsw i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 2
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 16 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP7]])
-; CHECK-NEXT:    [[STRIDED_VEC7:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP8]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC6]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC7]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC6]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC7]], 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_VEC8:%.*]] = load <vscale x 16 x i32>, ptr [[TMP13]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC9:%.*]] = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> [[WIDE_VEC8]])
-; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC9]], 0
-; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC9]], 1
-; CHECK-NEXT:    [[STRIDED_VEC10:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP14]])
-; CHECK-NEXT:    [[STRIDED_VEC11:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP15]])
-; CHECK-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC10]], 0
-; CHECK-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC11]], 0
-; CHECK-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC10]], 1
-; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC11]], 1
-; CHECK-NEXT:    [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP16]], [[TMP9]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP22:%.*]] = sub nsw <vscale x 4 x i32> [[TMP10]], [[TMP17]]
-; CHECK-NEXT:    [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP11]], [[TMP18]]
-; CHECK-NEXT:    [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP19]]
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP23]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC12:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP24]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC13:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> [[INTERLEAVED_VEC]], <vscale x 8 x i32> [[INTERLEAVED_VEC12]])
-; CHECK-NEXT:    store <vscale x 16 x i32> [[INTERLEAVED_VEC13]], ptr [[TMP21]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
-; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[Y]], align 4
-; CHECK-NEXT:    [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[Y11]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT:    [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4
-; CHECK-NEXT:    store i32 [[SUB]], ptr [[Y14]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[Z]], align 4
-; CHECK-NEXT:    [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[Z19]], align 4
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT:    [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8
-; CHECK-NEXT:    store i32 [[SHL]], ptr [[Z22]], align 4
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
-; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[T]], align 4
-; CHECK-NEXT:    [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[T27]], align 4
-; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12
-; CHECK-NEXT:    store i32 [[SHR]], ptr [[T30]], align 4
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.xyzt, ptr %a, i64 %indvars.iv
-  %0 = load i32, ptr %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds %struct.xyzt, ptr %b, i64 %indvars.iv
-  %1 = load i32, ptr %arrayidx2, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx5 = getelementptr inbounds %struct.xyzt, ptr %dst, i64 %indvars.iv
-  store i32 %add, ptr %arrayidx5, align 4
-  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4
-  %2 = load i32, ptr %y, align 4
-  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 4
-  %3 = load i32, ptr %y11, align 4
-  %sub = sub nsw i32 %2, %3
-  %y14 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 4
-  store i32 %sub, ptr %y14, align 4
-  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8
-  %4 = load i32, ptr %z, align 4
-  %z19 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 8
-  %5 = load i32, ptr %z19, align 4
-  %shl = shl i32 %4, %5
-  %z22 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 8
-  store i32 %shl, ptr %z22, align 4
-  %t = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12
-  %6 = load i32, ptr %t, align 4
-  %t27 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 12
-  %7 = load i32, ptr %t27, align 4
-  %shr = ashr i32 %6, %7
-  %t30 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 12
-  store i32 %shr, ptr %t30, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
-  br i1 %exitcond.not, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-; Check vectorization on a reverse interleaved load/store groups of factor 4
-
-; for (int i = 1023; i >= 0; i--) {
-;   int a = A[i].x + i;
-;   int b = A[i].y - i;
-;   int c = A[i].z * i;
-;   int d = A[i].t << i;
-;   B[i].x = a;
-;   B[i].y = b;
-;   B[i].z = c;
-;   B[i].t = d;
-; }
-
-define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) #1{
-; CHECK-LABEL: @interleave_deinterleave_reverse(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
-; CHECK-NEXT:    [[INDUCTION:%.*]] = sub <vscale x 4 x i32> splat (i32 1023), [[TMP2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP4]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i32 [[TMP6]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = sub nsw i32 4, [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 [[TMP9]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 16 x i32>, ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP11]])
-; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[TMP12]])
-; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC1]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
-; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC1]], 1
-; CHECK-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 1
-; CHECK-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP13]])
-; CHECK-NEXT:    [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP14]])
-; CHECK-NEXT:    [[REVERSE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP15]])
-; CHECK-NEXT:    [[REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP16]])
-; CHECK-NEXT:    [[TMP17:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP18:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE3]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP19:%.*]] = mul nsw <vscale x 4 x i32> [[REVERSE4]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP20:%.*]] = shl nuw nsw <vscale x 4 x i32> [[REVERSE5]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 0
-; CHECK-NEXT:    [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 4
-; CHECK-NEXT:    [[TMP24:%.*]] = sub nsw i32 4, [[TMP23]]
-; CHECK-NEXT:    [[TMP25:%.*]] = sext i32 [[TMP24]] to i64
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP25]]
-; CHECK-NEXT:    [[REVERSE6:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP17]])
-; CHECK-NEXT:    [[REVERSE7:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP18]])
-; CHECK-NEXT:    [[REVERSE8:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP19]])
-; CHECK-NEXT:    [[REVERSE9:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP20]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[REVERSE6]], <vscale x 4 x i32> [[REVERSE8]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC10:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[REVERSE7]], <vscale x 4 x i32> [[REVERSE9]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC11:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> [[INTERLEAVED_VEC]], <vscale x 8 x i32> [[INTERLEAVED_VEC10]])
-; CHECK-NEXT:    store <vscale x 16 x i32> [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP44:![0-9]+]]
-;
-entry:
-  br label %for.body
-for.cond.cleanup:                                 ; preds = %for.body
-  ret void
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
-  %x = getelementptr inbounds %struct.xyzt, ptr %A, i64 %indvars.iv, i32 0
-  %load1 = load i32, ptr %x, align 4
-  %trunc = trunc i64 %indvars.iv to i32
-  %add = add nsw i32 %load1, %trunc
-  %y = getelementptr inbounds %struct.xyzt, ptr %A, i64 %indvars.iv, i32 1
-  %load2 = load i32, ptr %y, align 4
-  %sub = sub nsw i32 %load2, %trunc
-  %z = getelementptr inbounds %struct.xyzt, ptr %A, i64 %indvars.iv, i32 2
-  %load3 = load i32, ptr %z, align 4
-  %mul = mul nsw i32 %load3, %trunc
-  %t = getelementptr inbounds %struct.xyzt, ptr %A, i64 %indvars.iv, i32 3
-  %load4 = load i32, ptr %t, align 4
-  %shl = shl nuw nsw i32 %load4, %trunc
-  %x5 = getelementptr inbounds %struct.xyzt, ptr %B, i64 %indvars.iv, i32 0
-  store i32 %add, ptr %x5, align 4
-  %y8 = getelementptr inbounds %struct.xyzt, ptr %B, i64 %indvars.iv, i32 1
-  store i32 %sub, ptr %y8, align 4
-  %z5 = getelementptr inbounds %struct.xyzt, ptr %B, i64 %indvars.iv, i32 2
-  store i32 %mul, ptr %z5, align 4
-  %t8 = getelementptr inbounds %struct.xyzt, ptr %B, i64 %indvars.iv, i32 3
-  store i32 %shl, ptr %t8, align 4
-  %indvars.iv.next = add nsw i64 %indvars.iv, -1
-  %cmp = icmp sgt i64 %indvars.iv, 0
-  br i1 %cmp, label %for.body, label %for.cond.cleanup
-
-}
 attributes #1 = { "target-features"="+sve" vscale_range(1, 16) }
 attributes #0 = { "unsafe-fp-math"="true" "target-features"="+sve" vscale_range(1, 16) }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
index d4392bebdf37b..1a281fe7c6f7f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -529,255 +529,3 @@ for.inc:
 for.end:
   ret void
 }
-
-; Expected to contain interleave2/deinterleave2 instructions
-;
-; void masked_strided_factor4(const unsigned char* restrict p,
-;                            unsigned char* restrict q,
-;                            unsigned char guard) {
-; for(ix=0; ix < 1024; ++ix) {
-;     if (ix > guard) {
-;         char left1 = p[4*ix];
-;         char right1 = p[4*ix + 1];
-;         char left2 = p[4*ix + 2];
-;         char right2 = p[4*ix + 3];
-;         char max1 = max(left1, right1);
-;         char max2 = max(left2, right2);
-;         q[4*ix] = max1;
-;         q[4*ix + 1] = 0 - max1;
-;         q[4*ix + 2] = max2;
-;         q[4*ix + 3] = 0 - max2;
-;     }
-; }
-;}
-define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
-; SCALAR_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4
-; SCALAR_TAIL_FOLDING-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; SCALAR_TAIL_FOLDING-NEXT:  entry:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD]] to i32
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 4
-; SCALAR_TAIL_FOLDING-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP1]], 1024
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; SCALAR_TAIL_FOLDING:       vector.ph:
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP2]], 4
-; SCALAR_TAIL_FOLDING-NEXT:    [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP5:%.*]] = shl i32 [[TMP4]], 4
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; SCALAR_TAIL_FOLDING-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; SCALAR_TAIL_FOLDING-NEXT:    br label [[VECTOR_BODY:%.*]]
-; SCALAR_TAIL_FOLDING:       vector.body:
-; SCALAR_TAIL_FOLDING-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALAR_TAIL_FOLDING-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP8:%.*]] = shl i32 [[INDEX]], 2
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK1:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK2:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave2.nxv64i1(<vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i1> [[INTERLEAVED_MASK1]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK2]], <vscale x 64 x i8> poison)
-; SCALAR_TAIL_FOLDING-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 32 x i8>, <vscale x 32 x i8> } @llvm.vector.deinterleave2.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } [[STRIDED_VEC]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } [[STRIDED_VEC]], 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[STRIDED_VEC3:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[TMP11]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[STRIDED_VEC4:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[TMP12]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC3]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC4]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC3]], 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC4]], 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP17]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP20:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP19]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP8]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP21]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP17]], <vscale x 16 x i8> [[TMP19]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC5:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP18]], <vscale x 16 x i8> [[TMP20]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC6:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave2.nxv64i8(<vscale x 32 x i8> [[INTERLEAVED_VEC]], <vscale x 32 x i8> [[INTERLEAVED_VEC5]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK7:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK8:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK9:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave2.nxv64i1(<vscale x 32 x i1> [[INTERLEAVED_MASK7]], <vscale x 32 x i1> [[INTERLEAVED_MASK8]])
-; SCALAR_TAIL_FOLDING-NEXT:    call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC6]], ptr [[TMP22]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK9]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; SCALAR_TAIL_FOLDING:       middle.block:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; SCALAR_TAIL_FOLDING:       scalar.ph:
-; SCALAR_TAIL_FOLDING-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; SCALAR_TAIL_FOLDING-NEXT:    br label [[FOR_BODY:%.*]]
-; SCALAR_TAIL_FOLDING:       for.body:
-; SCALAR_TAIL_FOLDING-NEXT:    [[IX_024:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_024]], [[CONV]]
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
-; SCALAR_TAIL_FOLDING:       if.then:
-; SCALAR_TAIL_FOLDING-NEXT:    [[IDX0:%.*]] = shl nuw nsw i32 [[IX_024]], 2
-; SCALAR_TAIL_FOLDING-NEXT:    [[IDX1:%.*]] = or disjoint i32 [[IDX0]], 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[IDX2:%.*]] = or disjoint i32 [[IDX0]], 2
-; SCALAR_TAIL_FOLDING-NEXT:    [[IDX3:%.*]] = or disjoint i32 [[IDX0]], 3
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP24:%.*]] = zext nneg i32 [[IDX0]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY1IDX0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP24]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP25:%.*]] = load i8, ptr [[ARRAY1IDX0]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP26:%.*]] = zext nneg i32 [[IDX1]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY1IDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP26]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP27:%.*]] = load i8, ptr [[ARRAY1IDX1]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP28:%.*]] = zext nneg i32 [[IDX2]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY1IDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP28]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP29:%.*]] = load i8, ptr [[ARRAY1IDX2]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP30:%.*]] = zext nneg i32 [[IDX3]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY1IDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP30]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP31:%.*]] = load i8, ptr [[ARRAY1IDX3]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[SPEC_SELECT_I1:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP25]], i8 [[TMP27]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[SUB1:%.*]] = sub i8 0, [[SPEC_SELECT_I1]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[SPEC_SELECT_I2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP29]], i8 [[TMP31]])
-; SCALAR_TAIL_FOLDING-NEXT:    [[SUB2:%.*]] = sub i8 0, [[SPEC_SELECT_I2]]
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP32:%.*]] = zext nneg i32 [[IDX0]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY3IDX0:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP32]]
-; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SPEC_SELECT_I1]], ptr [[ARRAY3IDX0]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP33:%.*]] = zext nneg i32 [[IDX1]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY3IDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP33]]
-; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SUB1]], ptr [[ARRAY3IDX1]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP34:%.*]] = zext nneg i32 [[IDX2]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY3IDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP34]]
-; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SPEC_SELECT_I2]], ptr [[ARRAY3IDX2]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[TMP35:%.*]] = zext nneg i32 [[IDX3]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAY3IDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP35]]
-; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SUB2]], ptr [[ARRAY3IDX3]], align 1
-; SCALAR_TAIL_FOLDING-NEXT:    br label [[FOR_INC]]
-; SCALAR_TAIL_FOLDING:       for.inc:
-; SCALAR_TAIL_FOLDING-NEXT:    [[INC]] = add nuw nsw i32 [[IX_024]], 1
-; SCALAR_TAIL_FOLDING-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
-; SCALAR_TAIL_FOLDING:       for.end:
-; SCALAR_TAIL_FOLDING-NEXT:    ret void
-;
-; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4
-; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; PREDICATED_TAIL_FOLDING-NEXT:  entry:
-; PREDICATED_TAIL_FOLDING-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; PREDICATED_TAIL_FOLDING:       vector.ph:
-; PREDICATED_TAIL_FOLDING-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD]] to i32
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 4
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP2]], 4
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT:    br label [[VECTOR_BODY:%.*]]
-; PREDICATED_TAIL_FOLDING:       vector.body:
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP7:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP8:%.*]] = shl i32 [[INDEX]], 2
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK1:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK2:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave2.nxv64i1(<vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i1> [[INTERLEAVED_MASK1]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK2]], <vscale x 64 x i8> poison)
-; PREDICATED_TAIL_FOLDING-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 32 x i8>, <vscale x 32 x i8> } @llvm.vector.deinterleave2.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } [[STRIDED_VEC]], 0
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } [[STRIDED_VEC]], 1
-; PREDICATED_TAIL_FOLDING-NEXT:    [[STRIDED_VEC3:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[TMP11]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[STRIDED_VEC4:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[TMP12]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC3]], 0
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC4]], 0
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC3]], 1
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC4]], 1
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP17]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP20:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP19]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP8]] to i64
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP21]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP17]], <vscale x 16 x i8> [[TMP19]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC5:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP18]], <vscale x 16 x i8> [[TMP20]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_VEC6:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave2.nxv64i8(<vscale x 32 x i8> [[INTERLEAVED_VEC]], <vscale x 32 x i8> [[INTERLEAVED_VEC5]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK7:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK8:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INTERLEAVED_MASK9:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave2.nxv64i1(<vscale x 32 x i1> [[INTERLEAVED_MASK7]], <vscale x 32 x i1> [[INTERLEAVED_MASK8]])
-; PREDICATED_TAIL_FOLDING-NEXT:    call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC6]], ptr [[TMP22]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK9]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]])
-; PREDICATED_TAIL_FOLDING-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]]
-; PREDICATED_TAIL_FOLDING-NEXT:    [[TMP23:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT:    br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]]
-; PREDICATED_TAIL_FOLDING:       middle.block:
-; PREDICATED_TAIL_FOLDING-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; PREDICATED_TAIL_FOLDING:       scalar.ph:
-; PREDICATED_TAIL_FOLDING-NEXT:    br label [[FOR_BODY:%.*]]
-; PREDICATED_TAIL_FOLDING:       for.body:
-; PREDICATED_TAIL_FOLDING-NEXT:    br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
-; PREDICATED_TAIL_FOLDING:       if.then:
-; PREDICATED_TAIL_FOLDING-NEXT:    br label [[FOR_INC]]
-; PREDICATED_TAIL_FOLDING:       for.inc:
-; PREDICATED_TAIL_FOLDING-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
-; PREDICATED_TAIL_FOLDING:       for.end:
-; PREDICATED_TAIL_FOLDING-NEXT:    ret void
-;
-entry:
-  %conv = zext i8 %guard to i32
-  br label %for.body
-
-for.body:
-  %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp1 = icmp ugt i32 %ix.024, %conv
-  br i1 %cmp1, label %if.then, label %for.inc
-
-if.then:
-  %idx0 = shl nuw nsw i32 %ix.024, 2
-  %idx1 = add i32 %idx0, 1
-  %idx2 = add i32 %idx0, 2
-  %idx3 = add i32 %idx0, 3
-
-  %array1idx0 = getelementptr inbounds i8, ptr %p, i32 %idx0
-  %0 = load i8, ptr %array1idx0, align 1
-  %array1idx1 = getelementptr inbounds i8, ptr %p, i32 %idx1
-  %1 = load i8, ptr %array1idx1, align 1
-  %array1idx2 = getelementptr inbounds i8, ptr %p, i32 %idx2
-  %2 = load i8, ptr %array1idx2, align 1
-  %array1idx3 = getelementptr inbounds i8, ptr %p, i32 %idx3
-  %3 = load i8, ptr %array1idx3, align 1
-
-  %cmp.i1 = icmp slt i8 %0, %1
-  %spec.select.i1 = select i1 %cmp.i1, i8 %1, i8 %0
-  %sub1 = sub i8 0, %spec.select.i1
-  %cmp.i2 = icmp slt i8 %2, %3
-  %spec.select.i2 = select i1 %cmp.i2, i8 %3, i8 %2
-  %sub2 = sub i8 0, %spec.select.i2
-
-  %array3idx0 = getelementptr inbounds i8, ptr %q, i32 %idx0
-  store i8 %spec.select.i1, ptr %array3idx0, align 1
-  %array3idx1 = getelementptr inbounds i8, ptr %q, i32 %idx1
-  store i8 %sub1, ptr %array3idx1, align 1
-  %array3idx2 = getelementptr inbounds i8, ptr %q, i32 %idx2
-  store i8 %spec.select.i2, ptr %array3idx2, align 1
-  %array3idx3 = getelementptr inbounds i8, ptr %q, i32 %idx3
-  store i8 %sub2, ptr %array3idx3, align 1
-
-  br label %for.inc
-
-for.inc:
-  %inc = add nuw nsw i32 %ix.024, 1
-  %exitcond = icmp eq i32 %inc, 1024
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index b1ff589fe51bf..bda4839dead51 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -9,7 +9,7 @@ define void @load_store_factor2_i32(ptr %p) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -17,88 +17,88 @@ define void @load_store_factor2_i32(ptr %p) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 1)
-; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 2)
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[TMP12]])
-; CHECK-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2)
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]])
+; CHECK-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q2]], align 4
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; CHECK-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
 ; CHECK-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @load_store_factor2_i32(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP0]], 1
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; FIXED-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; FIXED-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; FIXED-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; FIXED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q2]], align 4
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; FIXED-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
 ; FIXED-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
@@ -107,7 +107,7 @@ define void @load_store_factor2_i32(ptr %p) {
 ; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
 ; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -115,44 +115,44 @@ define void @load_store_factor2_i32(ptr %p) {
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; SCALABLE-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT:    [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 1)
-; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 2)
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[TMP12]])
-; SCALABLE-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1)
+; SCALABLE-NEXT:    [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2)
+; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]])
+; SCALABLE-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; SCALABLE-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q2]], align 4
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; SCALABLE-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
 ; SCALABLE-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -186,7 +186,7 @@ define void @load_store_factor2_i64(ptr %p) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -194,88 +194,88 @@ define void @load_store_factor2_i64(ptr %p) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 1)
-; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 2)
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP12]])
-; CHECK-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]])
+; CHECK-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q2]], align 8
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
 ; CHECK-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @load_store_factor2_i64(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP0]], 1
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; FIXED-NEXT:    store <8 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 4
-; FIXED-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; FIXED-NEXT:    store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; FIXED-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q2]], align 8
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
 ; FIXED-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
@@ -284,7 +284,7 @@ define void @load_store_factor2_i64(ptr %p) {
 ; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
 ; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -292,44 +292,44 @@ define void @load_store_factor2_i64(ptr %p) {
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 1)
-; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 2)
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP12]])
-; SCALABLE-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
+; SCALABLE-NEXT:    [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
+; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]])
+; SCALABLE-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q2]], align 8
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
 ; SCALABLE-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -360,42 +360,42 @@ exit:
 define void @load_store_factor3_i32(ptr %p) {
 ; CHECK-LABEL: @load_store_factor3_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 8
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; CHECK-NEXT:    [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q3]], align 4
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; CHECK-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
@@ -405,50 +405,50 @@ define void @load_store_factor3_i32(ptr %p) {
 ; CHECK-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
 ; CHECK-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
 ; CHECK-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @load_store_factor3_i32(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; FIXED-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; FIXED-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; FIXED-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; FIXED-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; FIXED-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; FIXED-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; FIXED-NEXT:    [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; FIXED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q3]], align 4
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; FIXED-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
@@ -458,50 +458,50 @@ define void @load_store_factor3_i32(ptr %p) {
 ; FIXED-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
 ; FIXED-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
 ; FIXED-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
 ; SCALABLE-LABEL: @load_store_factor3_i32(
 ; SCALABLE-NEXT:  entry:
-; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; SCALABLE-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; SCALABLE-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; SCALABLE-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; SCALABLE-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SCALABLE-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SCALABLE-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; SCALABLE-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 8
-; SCALABLE-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; SCALABLE-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; SCALABLE-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; SCALABLE-NEXT:    [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
 ; SCALABLE-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q3]], align 4
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; SCALABLE-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
@@ -511,9 +511,9 @@ define void @load_store_factor3_i32(ptr %p) {
 ; SCALABLE-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
 ; SCALABLE-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
 ; SCALABLE-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -550,42 +550,42 @@ exit:
 define void @load_store_factor3_i64(ptr %p) {
 ; CHECK-LABEL: @load_store_factor3_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; CHECK-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q3]], align 8
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -595,50 +595,50 @@ define void @load_store_factor3_i64(ptr %p) {
 ; CHECK-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
 ; CHECK-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
 ; CHECK-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @load_store_factor3_i64(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; FIXED-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT:    [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; FIXED-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; FIXED-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; FIXED-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 4
-; FIXED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; FIXED-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; FIXED-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q3]], align 8
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -648,50 +648,50 @@ define void @load_store_factor3_i64(ptr %p) {
 ; FIXED-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
 ; FIXED-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
 ; FIXED-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
 ; SCALABLE-LABEL: @load_store_factor3_i64(
 ; SCALABLE-NEXT:  entry:
-; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[TMP0]], 3
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; SCALABLE-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; SCALABLE-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; SCALABLE-NEXT:    [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; SCALABLE-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SCALABLE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SCALABLE-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; SCALABLE-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 4
-; SCALABLE-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; SCALABLE-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; SCALABLE-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q3]], align 8
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -701,9 +701,9 @@ define void @load_store_factor3_i64(ptr %p) {
 ; SCALABLE-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
 ; SCALABLE-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
 ; SCALABLE-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -740,75 +740,56 @@ exit:
 define void @load_store_factor8(ptr %p) {
 ; CHECK-LABEL: @load_store_factor8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP3]], 3
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
-; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP7]])
-; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
-; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
-; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
-; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP11]])
-; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
-; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
-; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
-; CHECK-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
-; CHECK-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
-; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
-; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 1)
-; CHECK-NEXT:    [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 2)
-; CHECK-NEXT:    [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 3)
-; CHECK-NEXT:    [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 4)
-; CHECK-NEXT:    [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 5)
-; CHECK-NEXT:    [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 6)
-; CHECK-NEXT:    [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 7)
-; CHECK-NEXT:    [[TMP27:%.*]] = add <vscale x 1 x i64> [[TMP19]], splat (i64 8)
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP23]], <vscale x 1 x i64> [[TMP27]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC10:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC]], <vscale x 2 x i64> [[INTERLEAVED_VEC8]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC11:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC7]], <vscale x 2 x i64> [[INTERLEAVED_VEC9]])
-; CHECK-NEXT:    [[INTERLEAVED_VEC12:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC10]], <vscale x 4 x i64> [[INTERLEAVED_VEC11]])
-; CHECK-NEXT:    store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
-; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
+; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
+; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
+; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 3, i32 11>
+; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 4, i32 12>
+; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
+; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
+; CHECK-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; CHECK-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; CHECK-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; CHECK-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; CHECK-NEXT:    [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q8]], align 8
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -843,23 +824,23 @@ define void @load_store_factor8(ptr %p) {
 ; CHECK-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
 ; CHECK-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
 ; CHECK-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @load_store_factor8(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP0]], 3
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
+; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
 ; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
@@ -868,39 +849,39 @@ define void @load_store_factor8(ptr %p) {
 ; FIXED-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
 ; FIXED-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
 ; FIXED-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
-; FIXED-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; FIXED-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
-; FIXED-NEXT:    [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
-; FIXED-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
-; FIXED-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
-; FIXED-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
-; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT:    [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT:    [[TMP17:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP17]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 2
-; FIXED-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; FIXED-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; FIXED-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; FIXED-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
+; FIXED-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; FIXED-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; FIXED-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; FIXED-NEXT:    [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q8]], align 8
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -935,83 +916,64 @@ define void @load_store_factor8(ptr %p) {
 ; FIXED-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
 ; FIXED-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
 ; FIXED-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
 ; SCALABLE-LABEL: @load_store_factor8(
 ; SCALABLE-NEXT:  entry:
-; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
-; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
-; SCALABLE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
-; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP3:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP3]], 3
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
-; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
-; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP7]])
-; SCALABLE-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
-; SCALABLE-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
-; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
-; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
-; SCALABLE-NEXT:    [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
-; SCALABLE-NEXT:    [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
-; SCALABLE-NEXT:    [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; SCALABLE-NEXT:    [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP11]])
-; SCALABLE-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
-; SCALABLE-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
-; SCALABLE-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
-; SCALABLE-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
-; SCALABLE-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
-; SCALABLE-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
-; SCALABLE-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
-; SCALABLE-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
-; SCALABLE-NEXT:    [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 1)
-; SCALABLE-NEXT:    [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 2)
-; SCALABLE-NEXT:    [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 3)
-; SCALABLE-NEXT:    [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 4)
-; SCALABLE-NEXT:    [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 5)
-; SCALABLE-NEXT:    [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 6)
-; SCALABLE-NEXT:    [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 7)
-; SCALABLE-NEXT:    [[TMP27:%.*]] = add <vscale x 1 x i64> [[TMP19]], splat (i64 8)
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP23]], <vscale x 1 x i64> [[TMP27]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC10:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC]], <vscale x 2 x i64> [[INTERLEAVED_VEC8]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC11:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC7]], <vscale x 2 x i64> [[INTERLEAVED_VEC9]])
-; SCALABLE-NEXT:    [[INTERLEAVED_VEC12:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC10]], <vscale x 4 x i64> [[INTERLEAVED_VEC11]])
-; SCALABLE-NEXT:    store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
-; SCALABLE-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
+; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
+; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
+; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
+; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
+; SCALABLE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 3, i32 11>
+; SCALABLE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 4, i32 12>
+; SCALABLE-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
+; SCALABLE-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
+; SCALABLE-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; SCALABLE-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; SCALABLE-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; SCALABLE-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
+; SCALABLE-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; SCALABLE-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; SCALABLE-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; SCALABLE:       middle.block:
-; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; SCALABLE-NEXT:    [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
 ; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q8]], align 8
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
@@ -1046,9 +1008,9 @@ define void @load_store_factor8(ptr %p) {
 ; SCALABLE-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
 ; SCALABLE-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
 ; SCALABLE-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -1118,7 +1080,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -1126,94 +1088,94 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; CHECK-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; CHECK-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; CHECK-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
 ; CHECK-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @combine_load_factor2_i32(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[I]], 8
-; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 8
+; FIXED-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 1
 ; FIXED-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP1]], 1
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]]
 ; FIXED-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
-; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]]
-; FIXED-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
-; FIXED-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]]
-; FIXED-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
-; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 8
-; FIXED-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP9]], align 4
-; FIXED-NEXT:    store <8 x i32> [[TMP7]], ptr [[TMP10]], align 4
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 16
-; FIXED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; FIXED-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
+; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; FIXED-NEXT:    [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]]
+; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
+; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]]
+; FIXED-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
+; FIXED-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8
+; FIXED-NEXT:    store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4
+; FIXED-NEXT:    store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; FIXED-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; FIXED-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; FIXED-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
 ; FIXED-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
@@ -1222,7 +1184,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
 ; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
 ; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -1230,43 +1192,43 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; SCALABLE-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT:    [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], [[TMP10]]
-; SCALABLE-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
-; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT:    store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], [[TMP11]]
+; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
+; SCALABLE-NEXT:    store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
 ; SCALABLE-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
 ; SCALABLE-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
@@ -1301,7 +1263,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -1309,94 +1271,94 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
-; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
-; CHECK-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0
+; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP12]], ptr [[TMP14]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; CHECK-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; CHECK-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; CHECK-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
 ; CHECK-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
-; CHECK-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 ; FIXED-LABEL: @combine_load_factor2_i64(
 ; FIXED-NEXT:  entry:
-; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    br label [[LOOP:%.*]]
+; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
-; FIXED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[I]], 0
-; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[I]], 4
-; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; FIXED-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 1
 ; FIXED-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP1]], 1
-; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]]
 ; FIXED-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
-; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8
-; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC2]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC2]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]]
-; FIXED-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
-; FIXED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]]
-; FIXED-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
-; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[TMP8]], i32 4
-; FIXED-NEXT:    store <4 x i64> [[TMP6]], ptr [[TMP9]], align 8
-; FIXED-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP10]], align 8
-; FIXED-NEXT:    [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; FIXED-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8
+; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; FIXED-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]]
+; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
+; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]]
+; FIXED-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
+; FIXED-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[TMP10]], i32 4
+; FIXED-NEXT:    store <4 x i64> [[TMP8]], ptr [[TMP12]], align 8
+; FIXED-NEXT:    store <4 x i64> [[TMP9]], ptr [[TMP13]], align 8
+; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; FIXED:       middle.block:
 ; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; FIXED:       scalar.ph:
-; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT:    br label [[LOOP1:%.*]]
+; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT:    br label [[LOOP:%.*]]
 ; FIXED:       loop:
-; FIXED-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; FIXED-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; FIXED-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; FIXED-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
 ; FIXED-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
-; FIXED-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
 ; FIXED:       exit:
 ; FIXED-NEXT:    ret void
 ;
@@ -1405,7 +1367,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
 ; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
 ; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SCALABLE:       vector.ph:
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -1413,43 +1375,43 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; SCALABLE-NEXT:    br label [[LOOP:%.*]]
+; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
-; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 0
-; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[TMP6]], 1
-; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
+; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], [[TMP10]]
-; SCALABLE-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
-; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT:    store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
-; SCALABLE-NEXT:    [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], [[TMP11]]
+; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0
+; SCALABLE-NEXT:    store <vscale x 2 x i64> [[TMP12]], ptr [[TMP14]], align 8
+; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; SCALABLE:       middle.block:
 ; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; SCALABLE:       scalar.ph:
-; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT:    br label [[LOOP1:%.*]]
+; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT:    br label [[LOOP:%.*]]
 ; SCALABLE:       loop:
-; SCALABLE-NEXT:    [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT:    [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
 ; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
 ; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
 ; SCALABLE-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
 ; SCALABLE-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
-; SCALABLE-NEXT:    [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
 ; SCALABLE:       exit:
 ; SCALABLE-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll
deleted file mode 100644
index 362ec22600f92..0000000000000
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=loop-vectorize,interleaved-access -mattr=+sve -S -o - %s | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64"
-
-%struct.xyzt = type { i32, i32, i32, i32 }
-; for (int i = 0; i < 1024; ++i) {
-;   dst[i].x = a[i].x + b[i].x;
-;   dst[i].y = a[i].y - b[i].y;
-;   dst[i].z = a[i].z << b[i].z;
-;   dst[i].t = a[i].t >> b[i].t;
-; }
-
-define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) {
-; CHECK-LABEL: @interleave_deinterleave(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP7]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
-; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[LDN9:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP13]])
-; CHECK-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 0
-; CHECK-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 1
-; CHECK-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 2
-; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP16]], [[TMP9]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP22:%.*]] = sub nsw <vscale x 4 x i32> [[TMP10]], [[TMP17]]
-; CHECK-NEXT:    [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP11]], [[TMP18]]
-; CHECK-NEXT:    [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP19]]
-; CHECK-NEXT:    call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i1> splat (i1 true), ptr [[TMP21]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[Y]], align 4
-; CHECK-NEXT:    [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[Y11]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP33]], [[TMP26]]
-; CHECK-NEXT:    [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4
-; CHECK-NEXT:    store i32 [[SUB]], ptr [[Y14]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[Z]], align 4
-; CHECK-NEXT:    [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8
-; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[Z19]], align 4
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT:    [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8
-; CHECK-NEXT:    store i32 [[SHL]], ptr [[Z22]], align 4
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[T]], align 4
-; CHECK-NEXT:    [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[T27]], align 4
-; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12
-; CHECK-NEXT:    store i32 [[SHR]], ptr [[T30]], align 4
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %gep.a = getelementptr inbounds %struct.xyzt, ptr %a, i64 %iv
-  %a.0 = load i32, ptr %gep.a, align 4
-  %gep.b = getelementptr inbounds %struct.xyzt, ptr %b, i64 %iv
-  %b.0 = load i32, ptr %gep.b, align 4
-  %add = add nsw i32 %b.0, %a.0
-  %gep.dst = getelementptr inbounds %struct.xyzt, ptr %dst, i64 %iv
-  store i32 %add, ptr %gep.dst, align 4
-  %gep.a.1 = getelementptr inbounds nuw i8, ptr %gep.a, i64 4
-  %a.1 = load i32, ptr %gep.a.1, align 4
-  %gep.b.1 = getelementptr inbounds nuw i8, ptr %gep.b, i64 4
-  %b.1 = load i32, ptr %gep.b.1, align 4
-  %sub = sub nsw i32 %a.1, %b.1
-  %gep.dst.1 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 4
-  store i32 %sub, ptr %gep.dst.1, align 4
-  %gep.a.2 = getelementptr inbounds nuw i8, ptr %gep.a, i64 8
-  %a.2 = load i32, ptr %gep.a.2, align 4
-  %gep.b.2 = getelementptr inbounds nuw i8, ptr %gep.b, i64 8
-  %b.2 = load i32, ptr %gep.b.2, align 4
-  %shl = shl i32 %a.2, %b.2
-  %gep.dst.2 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 8
-  store i32 %shl, ptr %gep.dst.2, align 4
-  %gep.a.3 = getelementptr inbounds nuw i8, ptr %gep.a, i64 12
-  %a.3 = load i32, ptr %gep.a.3, align 4
-  %gep.b.3 = getelementptr inbounds nuw i8, ptr %gep.b, i64 12
-  %b.3 = load i32, ptr %gep.b.3, align 4
-  %shr = ashr i32 %a.3, %b.3
-  %gep.dst.3 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 12
-  store i32 %shr, ptr %gep.dst.3, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
diff --git a/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll b/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll
index d4aa5c507ec8b..e20b9c2c219d8 100644
--- a/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll
+++ b/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll
@@ -4,13 +4,13 @@
 define <8 x i1> @vector0(<8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector0(
 ; CHECK-SAME: <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[OR67:%.*]] = or <8 x i1> [[B1]], [[B0]]
-; CHECK-NEXT:    [[OR45:%.*]] = or <8 x i1> [[OR67]], [[B2]]
-; CHECK-NEXT:    [[OR4567:%.*]] = or <8 x i1> [[OR45]], [[B3]]
-; CHECK-NEXT:    [[OR23:%.*]] = or <8 x i1> [[OR4567]], [[B4]]
-; CHECK-NEXT:    [[OR01:%.*]] = or <8 x i1> [[OR23]], [[B5]]
-; CHECK-NEXT:    [[OR0123:%.*]] = or <8 x i1> [[OR01]], [[B6]]
-; CHECK-NEXT:    [[OR01234567:%.*]] = or <8 x i1> [[OR0123]], [[B7]]
+; CHECK-NEXT:    [[OR01:%.*]] = or <8 x i1> [[B0]], [[B1]]
+; CHECK-NEXT:    [[OR23:%.*]] = or <8 x i1> [[B2]], [[B3]]
+; CHECK-NEXT:    [[OR45:%.*]] = or <8 x i1> [[B4]], [[B5]]
+; CHECK-NEXT:    [[OR67:%.*]] = or <8 x i1> [[B6]], [[B7]]
+; CHECK-NEXT:    [[OR0123:%.*]] = or <8 x i1> [[OR01]], [[OR23]]
+; CHECK-NEXT:    [[OR4567:%.*]] = or <8 x i1> [[OR45]], [[OR67]]
+; CHECK-NEXT:    [[OR01234567:%.*]] = or <8 x i1> [[OR0123]], [[OR4567]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR01234567]]
 ;
   %or01 = or <8 x i1> %b0, %b1
@@ -26,13 +26,13 @@ define <8 x i1> @vector0(<8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3,
 define <8 x i1> @vector1(<8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector1(
 ; CHECK-SAME: <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[OR67:%.*]] = and <8 x i1> [[B1]], [[B0]]
-; CHECK-NEXT:    [[OR45:%.*]] = and <8 x i1> [[OR67]], [[B2]]
-; CHECK-NEXT:    [[OR4567:%.*]] = and <8 x i1> [[OR45]], [[B3]]
-; CHECK-NEXT:    [[OR23:%.*]] = and <8 x i1> [[OR4567]], [[B4]]
-; CHECK-NEXT:    [[OR01:%.*]] = and <8 x i1> [[OR23]], [[B5]]
-; CHECK-NEXT:    [[OR0123:%.*]] = and <8 x i1> [[OR01]], [[B6]]
-; CHECK-NEXT:    [[OR01234567:%.*]] = and <8 x i1> [[OR0123]], [[B7]]
+; CHECK-NEXT:    [[OR01:%.*]] = and <8 x i1> [[B0]], [[B1]]
+; CHECK-NEXT:    [[OR23:%.*]] = and <8 x i1> [[B2]], [[B3]]
+; CHECK-NEXT:    [[OR45:%.*]] = and <8 x i1> [[B4]], [[B5]]
+; CHECK-NEXT:    [[OR67:%.*]] = and <8 x i1> [[B6]], [[B7]]
+; CHECK-NEXT:    [[OR0123:%.*]] = and <8 x i1> [[OR01]], [[OR23]]
+; CHECK-NEXT:    [[OR4567:%.*]] = and <8 x i1> [[OR45]], [[OR67]]
+; CHECK-NEXT:    [[OR01234567:%.*]] = and <8 x i1> [[OR0123]], [[OR4567]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR01234567]]
 ;
   %or01 = and <8 x i1> %b0, %b1
@@ -48,27 +48,28 @@ define <8 x i1> @vector1(<8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3,
 define <8 x i1> @vector2(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector2(
 ; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[OR0:%.*]] = or <8 x i1> [[B0]], [[A]]
-; CHECK-NEXT:    [[OR1:%.*]] = or <8 x i1> [[B1]], [[A]]
-; CHECK-NEXT:    [[OR2:%.*]] = or <8 x i1> [[B2]], [[A]]
-; CHECK-NEXT:    [[OR3:%.*]] = or <8 x i1> [[B3]], [[A]]
-; CHECK-NEXT:    [[OR4:%.*]] = or <8 x i1> [[B4]], [[A]]
-; CHECK-NEXT:    [[OR5:%.*]] = or <8 x i1> [[B5]], [[A]]
-; CHECK-NEXT:    [[OR6:%.*]] = or <8 x i1> [[B6]], [[A]]
-; CHECK-NEXT:    [[OR7:%.*]] = or <8 x i1> [[B7]], [[A]]
-; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[OR1]], [[OR0]]
-; CHECK-NEXT:    [[OR045:%.*]] = xor <8 x i1> [[XOR2]], [[OR2]]
-; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[OR045]], [[OR3]]
-; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[XOR3]], [[OR4]]
-; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[XOR4]], [[OR5]]
-; CHECK-NEXT:    [[XOR6:%.*]] = xor <8 x i1> [[XOR5]], [[OR6]]
+; CHECK-NEXT:    [[OR0:%.*]] = or <8 x i1> [[A]], [[B0]]
+; CHECK-NEXT:    [[OR1:%.*]] = or <8 x i1> [[A]], [[B1]]
+; CHECK-NEXT:    [[OR2:%.*]] = or <8 x i1> [[A]], [[B2]]
+; CHECK-NEXT:    [[OR3:%.*]] = or <8 x i1> [[A]], [[B3]]
+; CHECK-NEXT:    [[OR7:%.*]] = or <8 x i1> [[A]], [[B4]]
+; CHECK-NEXT:    [[OR5:%.*]] = or <8 x i1> [[A]], [[B5]]
+; CHECK-NEXT:    [[OR6:%.*]] = or <8 x i1> [[A]], [[B6]]
+; CHECK-NEXT:    [[OR8:%.*]] = or <8 x i1> [[A]], [[B7]]
+; CHECK-NEXT:    [[OR045:%.*]] = xor <8 x i1> [[OR0]], [[OR1]]
+; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[OR045]], [[OR2]]
+; CHECK-NEXT:    [[XOR6:%.*]] = xor <8 x i1> [[XOR2]], [[OR3]]
 ; CHECK-NEXT:    [[XOR7:%.*]] = xor <8 x i1> [[XOR6]], [[OR7]]
+; CHECK-NEXT:    [[OR023:%.*]] = xor <8 x i1> [[XOR7]], [[OR5]]
+; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[OR023]], [[OR6]]
+; CHECK-NEXT:    [[XOR8:%.*]] = xor <8 x i1> [[XOR4]], [[OR8]]
 ; CHECK-NEXT:    [[OR4560:%.*]] = or <8 x i1> [[OR045]], [[XOR2]]
-; CHECK-NEXT:    [[OR023:%.*]] = or <8 x i1> [[OR4560]], [[XOR3]]
+; CHECK-NEXT:    [[OR23:%.*]] = or <8 x i1> [[XOR6]], [[XOR7]]
 ; CHECK-NEXT:    [[OR001:%.*]] = or <8 x i1> [[OR023]], [[XOR4]]
+; CHECK-NEXT:    [[XOR5:%.*]] = or <8 x i1> [[OR045]], [[XOR8]]
+; CHECK-NEXT:    [[OR123:%.*]] = or <8 x i1> [[OR4560]], [[OR23]]
 ; CHECK-NEXT:    [[OR0123:%.*]] = or <8 x i1> [[OR001]], [[XOR5]]
-; CHECK-NEXT:    [[OR01234567:%.*]] = or <8 x i1> [[OR0123]], [[XOR6]]
-; CHECK-NEXT:    [[OR1234567:%.*]] = or <8 x i1> [[OR01234567]], [[XOR7]]
+; CHECK-NEXT:    [[OR1234567:%.*]] = or <8 x i1> [[OR123]], [[OR0123]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR1234567]]
 ;
   %or0 = or <8 x i1> %b0, %a
@@ -99,20 +100,20 @@ define <8 x i1> @vector2(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2,
 define <8 x i1> @vector3(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector3(
 ; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[OR0:%.*]] = or <8 x i1> [[B0]], [[A]]
-; CHECK-NEXT:    [[OR1:%.*]] = or <8 x i1> [[B1]], [[A]]
-; CHECK-NEXT:    [[OR2:%.*]] = or <8 x i1> [[B2]], [[A]]
-; CHECK-NEXT:    [[OR3:%.*]] = or <8 x i1> [[B3]], [[A]]
-; CHECK-NEXT:    [[OR4:%.*]] = or <8 x i1> [[B4]], [[A]]
-; CHECK-NEXT:    [[OR5:%.*]] = or <8 x i1> [[B5]], [[A]]
-; CHECK-NEXT:    [[OR6:%.*]] = or <8 x i1> [[B6]], [[A]]
-; CHECK-NEXT:    [[OR7:%.*]] = or <8 x i1> [[B7]], [[A]]
+; CHECK-NEXT:    [[OR1:%.*]] = or <8 x i1> [[A]], [[B0]]
+; CHECK-NEXT:    [[OR0:%.*]] = or <8 x i1> [[A]], [[B1]]
+; CHECK-NEXT:    [[OR2:%.*]] = or <8 x i1> [[A]], [[B2]]
+; CHECK-NEXT:    [[OR4:%.*]] = or <8 x i1> [[A]], [[B3]]
+; CHECK-NEXT:    [[XOR2:%.*]] = or <8 x i1> [[A]], [[B4]]
+; CHECK-NEXT:    [[OR3:%.*]] = or <8 x i1> [[A]], [[B5]]
+; CHECK-NEXT:    [[XOR0:%.*]] = or <8 x i1> [[A]], [[B6]]
+; CHECK-NEXT:    [[OR5:%.*]] = or <8 x i1> [[A]], [[B7]]
 ; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[OR1]], [[OR0]]
-; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[XOR3]], [[OR2]]
+; CHECK-NEXT:    [[XOR1:%.*]] = xor <8 x i1> [[OR2]], [[OR4]]
 ; CHECK-NEXT:    [[XOR7:%.*]] = xor <8 x i1> [[XOR2]], [[OR3]]
-; CHECK-NEXT:    [[XOR0:%.*]] = xor <8 x i1> [[XOR7]], [[OR4]]
 ; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[XOR0]], [[OR5]]
-; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[XOR4]], [[OR6]]
+; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[XOR3]], [[XOR1]]
+; CHECK-NEXT:    [[OR7:%.*]] = xor <8 x i1> [[XOR7]], [[XOR4]]
 ; CHECK-NEXT:    [[OR4560:%.*]] = xor <8 x i1> [[XOR5]], [[OR7]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR4560]]
 ;
@@ -137,20 +138,20 @@ define <8 x i1> @vector3(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2,
 define <8 x i1> @vector4(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector4(
 ; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[XOR0:%.*]] = xor <8 x i1> [[B0]], [[A]]
-; CHECK-NEXT:    [[XOR1:%.*]] = xor <8 x i1> [[B1]], [[A]]
-; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[B2]], [[A]]
-; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[B3]], [[A]]
-; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[B4]], [[A]]
-; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[B5]], [[A]]
-; CHECK-NEXT:    [[XOR6:%.*]] = xor <8 x i1> [[B6]], [[A]]
-; CHECK-NEXT:    [[XOR7:%.*]] = xor <8 x i1> [[B7]], [[A]]
+; CHECK-NEXT:    [[XOR1:%.*]] = xor <8 x i1> [[A]], [[B0]]
+; CHECK-NEXT:    [[XOR0:%.*]] = xor <8 x i1> [[A]], [[B1]]
+; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[A]], [[B2]]
+; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[A]], [[B3]]
+; CHECK-NEXT:    [[AND2:%.*]] = xor <8 x i1> [[A]], [[B4]]
+; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[A]], [[B5]]
+; CHECK-NEXT:    [[AND1:%.*]] = xor <8 x i1> [[A]], [[B6]]
+; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[A]], [[B7]]
 ; CHECK-NEXT:    [[AND3:%.*]] = and <8 x i1> [[XOR1]], [[XOR0]]
-; CHECK-NEXT:    [[AND2:%.*]] = and <8 x i1> [[AND3]], [[XOR2]]
+; CHECK-NEXT:    [[AND4:%.*]] = and <8 x i1> [[XOR2]], [[XOR4]]
 ; CHECK-NEXT:    [[OR23:%.*]] = and <8 x i1> [[AND2]], [[XOR3]]
-; CHECK-NEXT:    [[AND1:%.*]] = and <8 x i1> [[OR23]], [[XOR4]]
 ; CHECK-NEXT:    [[AND0:%.*]] = and <8 x i1> [[AND1]], [[XOR5]]
-; CHECK-NEXT:    [[OR01:%.*]] = and <8 x i1> [[AND0]], [[XOR6]]
+; CHECK-NEXT:    [[OR01:%.*]] = and <8 x i1> [[AND3]], [[AND4]]
+; CHECK-NEXT:    [[XOR7:%.*]] = and <8 x i1> [[OR23]], [[AND0]]
 ; CHECK-NEXT:    [[OR0123:%.*]] = and <8 x i1> [[OR01]], [[XOR7]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR0123]]
 ;
@@ -175,29 +176,29 @@ define <8 x i1> @vector4(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2,
 define <8 x i1> @vector5(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, <8 x i1> %b3, <8 x i1> %b4, <8 x i1> %b5, <8 x i1> %b6, <8 x i1> %b7) {
 ; CHECK-LABEL: define <8 x i1> @vector5(
 ; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B0:%.*]], <8 x i1> [[B1:%.*]], <8 x i1> [[B2:%.*]], <8 x i1> [[B3:%.*]], <8 x i1> [[B4:%.*]], <8 x i1> [[B5:%.*]], <8 x i1> [[B6:%.*]], <8 x i1> [[B7:%.*]]) {
-; CHECK-NEXT:    [[XOR0:%.*]] = xor <8 x i1> [[B0]], [[A]]
-; CHECK-NEXT:    [[XOR1:%.*]] = xor <8 x i1> [[B1]], [[A]]
-; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[B2]], [[A]]
-; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[B3]], [[A]]
-; CHECK-NEXT:    [[XOR4:%.*]] = xor <8 x i1> [[B4]], [[A]]
-; CHECK-NEXT:    [[XOR5:%.*]] = xor <8 x i1> [[B5]], [[A]]
-; CHECK-NEXT:    [[XOR6:%.*]] = xor <8 x i1> [[B6]], [[A]]
-; CHECK-NEXT:    [[XOR7:%.*]] = xor <8 x i1> [[B7]], [[A]]
-; CHECK-NEXT:    [[OR3:%.*]] = or <8 x i1> [[B1]], [[B0]]
+; CHECK-NEXT:    [[OR3:%.*]] = xor <8 x i1> [[A]], [[B0]]
+; CHECK-NEXT:    [[XOR0:%.*]] = xor <8 x i1> [[A]], [[B1]]
+; CHECK-NEXT:    [[OR23:%.*]] = xor <8 x i1> [[A]], [[B2]]
+; CHECK-NEXT:    [[XOR1:%.*]] = xor <8 x i1> [[A]], [[B3]]
+; CHECK-NEXT:    [[OR0:%.*]] = xor <8 x i1> [[A]], [[B4]]
+; CHECK-NEXT:    [[XOR2:%.*]] = xor <8 x i1> [[A]], [[B5]]
+; CHECK-NEXT:    [[OR0123:%.*]] = xor <8 x i1> [[A]], [[B6]]
+; CHECK-NEXT:    [[XOR3:%.*]] = xor <8 x i1> [[A]], [[B7]]
 ; CHECK-NEXT:    [[OR2:%.*]] = or <8 x i1> [[OR3]], [[XOR0]]
-; CHECK-NEXT:    [[OR23:%.*]] = or <8 x i1> [[OR2]], [[B2]]
 ; CHECK-NEXT:    [[OR1:%.*]] = or <8 x i1> [[OR23]], [[XOR1]]
-; CHECK-NEXT:    [[OR0:%.*]] = or <8 x i1> [[OR1]], [[B3]]
 ; CHECK-NEXT:    [[OR01:%.*]] = or <8 x i1> [[OR0]], [[XOR2]]
-; CHECK-NEXT:    [[OR0123:%.*]] = or <8 x i1> [[OR01]], [[B4]]
 ; CHECK-NEXT:    [[OR7:%.*]] = or <8 x i1> [[OR0123]], [[XOR3]]
-; CHECK-NEXT:    [[OR6:%.*]] = or <8 x i1> [[OR7]], [[B5]]
-; CHECK-NEXT:    [[OR67:%.*]] = or <8 x i1> [[OR6]], [[XOR4]]
-; CHECK-NEXT:    [[OR5:%.*]] = or <8 x i1> [[OR67]], [[B6]]
-; CHECK-NEXT:    [[OR4:%.*]] = or <8 x i1> [[OR5]], [[XOR5]]
-; CHECK-NEXT:    [[OR45:%.*]] = or <8 x i1> [[OR4]], [[B7]]
+; CHECK-NEXT:    [[OR45:%.*]] = or <8 x i1> [[B0]], [[B1]]
+; CHECK-NEXT:    [[XOR6:%.*]] = or <8 x i1> [[B2]], [[B3]]
+; CHECK-NEXT:    [[OR6:%.*]] = or <8 x i1> [[B4]], [[B5]]
+; CHECK-NEXT:    [[OR8:%.*]] = or <8 x i1> [[B6]], [[B7]]
+; CHECK-NEXT:    [[OR4:%.*]] = or <8 x i1> [[OR2]], [[OR1]]
+; CHECK-NEXT:    [[OR24:%.*]] = or <8 x i1> [[OR01]], [[OR7]]
 ; CHECK-NEXT:    [[OR4567:%.*]] = or <8 x i1> [[OR45]], [[XOR6]]
-; CHECK-NEXT:    [[OR01234567:%.*]] = or <8 x i1> [[OR4567]], [[XOR7]]
+; CHECK-NEXT:    [[OR67:%.*]] = or <8 x i1> [[OR6]], [[OR8]]
+; CHECK-NEXT:    [[OR123:%.*]] = or <8 x i1> [[OR4]], [[OR24]]
+; CHECK-NEXT:    [[OR4568:%.*]] = or <8 x i1> [[OR4567]], [[OR67]]
+; CHECK-NEXT:    [[OR01234567:%.*]] = or <8 x i1> [[OR4568]], [[OR123]]
 ; CHECK-NEXT:    ret <8 x i1> [[OR01234567]]
 ;
   %xor0 = xor <8 x i1> %b0, %a
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
index 57c877a58d5c0..29bd81998cdb2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -552,9 +552,62 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
   ret <4 x i32> %r3
 }
 
+define <2 x i32> @sdiv_v2i32_unknown_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
+; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_unknown_divisor(
+; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
+; NO-SVE-NEXT:    [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
+; NO-SVE-NEXT:    [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], [[X0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], [[X1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
+; NO-SVE-NEXT:    [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
+; NO-SVE-NEXT:    [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
+; NO-SVE-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
+; NO-SVE-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
+; NO-SVE-NEXT:    [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
+; NO-SVE-NEXT:    [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
+; NO-SVE-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
+; NO-SVE-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
+; NO-SVE-NEXT:    [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
+; NO-SVE-NEXT:    [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
+; NO-SVE-NEXT:    ret <2 x i32> [[RES1]]
+;
+; SVE-LABEL: define <2 x i32> @sdiv_v2i32_unknown_divisor(
+; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP2:%.*]] = sdiv <2 x i32> [[A]], [[X]]
+; SVE-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[X]]
+; SVE-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], [[Y]]
+; SVE-NEXT:    [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], [[Z]]
+; SVE-NEXT:    ret <2 x i32> [[TMP5]]
+;
+{
+  %a0 = extractelement <2 x i32> %a, i64 0
+  %a1 = extractelement <2 x i32> %a, i64 1
+  %x0 = extractelement <2 x i32> %x, i64 0
+  %x1 = extractelement <2 x i32> %x, i64 1
+  %1 = sdiv i32 %a0, %x0
+  %2 = sdiv i32 %a1, %x1
+  %3 = add i32 %1, %x0
+  %4 = add i32 %2, %x1
+  %y0 = extractelement <2 x i32> %y, i64 0
+  %y1 = extractelement <2 x i32> %y, i64 1
+  %5 = sub i32 %3, %y0
+  %6 = sub i32 %4, %y1
+  %z0 = extractelement <2 x i32> %z, i64 0
+  %z1 = extractelement <2 x i32> %z, i64 1
+  %7 = mul i32 %5, %z0
+  %8 = mul i32 %6, %z1
+  %res0 = insertelement <2 x i32> poison, i32 %7, i32 0
+  %res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
+  ret <2 x i32> %res1
+}
+
 ; computes (a/const + x - y) * z
-define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
-; NO-SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
+define <2 x i32> @sdiv_v2i32_const_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
+; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
 ; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
 ; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
 ; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
@@ -576,7 +629,7 @@ define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y,
 ; NO-SVE-NEXT:    [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
 ; NO-SVE-NEXT:    ret <2 x i32> [[RES1]]
 ;
-; SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
+; SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
 ; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
 ; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
 ; SVE-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
@@ -605,3 +658,57 @@ define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y,
   %res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
   ret <2 x i32> %res1
 }
+
+define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
+; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(
+; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], [[A0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 4
+; NO-SVE-NEXT:    [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
+; NO-SVE-NEXT:    [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
+; NO-SVE-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
+; NO-SVE-NEXT:    [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
+; NO-SVE-NEXT:    [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
+; NO-SVE-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
+; NO-SVE-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
+; NO-SVE-NEXT:    [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
+; NO-SVE-NEXT:    [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
+; NO-SVE-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
+; NO-SVE-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
+; NO-SVE-NEXT:    [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
+; NO-SVE-NEXT:    [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
+; NO-SVE-NEXT:    ret <2 x i32> [[RES1]]
+;
+; SVE-LABEL: define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(
+; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> <i32 poison, i32 4>, <2 x i32> <i32 0, i32 3>
+; SVE-NEXT:    [[TMP2:%.*]] = sdiv <2 x i32> [[A]], [[TMP1]]
+; SVE-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[X]]
+; SVE-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], [[Y]]
+; SVE-NEXT:    [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], [[Z]]
+; SVE-NEXT:    ret <2 x i32> [[TMP5]]
+;
+{
+  %a0 = extractelement <2 x i32> %a, i64 0
+  %a1 = extractelement <2 x i32> %a, i64 1
+  %1 = sdiv i32 %a0, %a0
+  %2 = sdiv i32 %a1, 4
+  %x0 = extractelement <2 x i32> %x, i64 0
+  %x1 = extractelement <2 x i32> %x, i64 1
+  %3 = add i32 %1, %x0
+  %4 = add i32 %2, %x1
+  %y0 = extractelement <2 x i32> %y, i64 0
+  %y1 = extractelement <2 x i32> %y, i64 1
+  %5 = sub i32 %3, %y0
+  %6 = sub i32 %4, %y1
+  %z0 = extractelement <2 x i32> %z, i64 0
+  %z1 = extractelement <2 x i32> %z, i64 1
+  %7 = mul i32 %5, %z0
+  %8 = mul i32 %6, %z1
+  %res0 = insertelement <2 x i32> poison, i32 %7, i32 0
+  %res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
+  ret <2 x i32> %res1
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
index dadd22217a3e6..02327272f3ab1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
@@ -7,18 +7,14 @@ define i32 @test() {
 ; CHECK-NEXT:    br label %[[FUNC_135_EXIT_I:.*]]
 ; CHECK:       [[FUNC_135_EXIT_I]]:
 ; CHECK-NEXT:    [[G_228_PROMOTED166_I1105_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 0, i32 0, i32 0, i32 poison>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 2, i32 2, i32 poison>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP5]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 28, i32 29, i32 30, i32 poison>
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 0, i32 15
-; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP3]], i64 0)
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-loads-non-power-of-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-loads-non-power-of-2.ll
index be0ed2c34a365..60b0f758133fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-loads-non-power-of-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-loads-non-power-of-2.ll
@@ -5,12 +5,7 @@ define <6 x double> @test(ptr %a) {
 ; CHECK-LABEL: define <6 x double> @test(
 ; CHECK-SAME: ptr [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[A]], i16 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <6 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> [[TMP4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = load <6 x double>, ptr [[A]], align 8
 ; CHECK-NEXT:    ret <6 x double> [[TMP5]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long-full-reg-stores.ll b/llvm/test/Transforms/SLPVectorizer/X86/long-full-reg-stores.ll
index aff66dd7c10ea..9fc2b7d6e7865 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long-full-reg-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long-full-reg-stores.ll
@@ -9,10 +9,10 @@ define void @test(ptr noalias %0, ptr noalias %1) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[TMP9]], align 16
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x double>, ptr [[TMP11]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <6 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP10]], <6 x i32> <i32 2, i32 4, i32 0, i32 3, i32 5, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <6 x i32> <i32 2, i32 4, i32 0, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> [[TMP7]], <6 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 5>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> [[TMP10]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 11>
 ; CHECK-NEXT:    store <6 x double> [[TMP13]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 40
 ; CHECK-NEXT:    [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
index 6000434ac1e95..598ff9a5178c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
@@ -10,9 +10,9 @@ define void @test() #0 {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ undef, [[BB]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 undef, [[TMP]]
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ 1, [[BB1]] ], [ 2, [[BB:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ 3, [[BB]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 4, [[TMP]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]]
@@ -34,9 +34,9 @@ bb:
   br label %bb1
 
 bb1:                                              ; preds = %bb1, %bb
-  %tmp = phi i32 [ undef, %bb1 ], [ undef, %bb ]
-  %tmp2 = phi i32 [ %tmp18, %bb1 ], [ undef, %bb ]
-  %tmp3 = mul i32 undef, %tmp
+  %tmp = phi i32 [ 1, %bb1 ], [ 2, %bb ]
+  %tmp2 = phi i32 [ %tmp18, %bb1 ], [ 3, %bb ]
+  %tmp3 = mul i32 4, %tmp
   %tmp4 = mul i32 %tmp3, %tmp
   %tmp5 = mul i32 %tmp4, %tmp
   %tmp6 = mul i32 %tmp5, %tmp
@@ -60,10 +60,10 @@ define void @test_2(ptr addrspace(1) %arg, i32 %arg1) #0 {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ undef, [[BB2]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ undef, [[BB2]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ 3, [[BB:%.*]] ], [ 3, [[BB2]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ 3, [[BB2]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[TMP]], 8
-; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 undef, [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 27, [[TMP0]]
 ; CHECK-NEXT:    call void @use(i32 [[OP_RDX]])
 ; CHECK-NEXT:    br label [[BB2]]
 ;
@@ -71,24 +71,24 @@ bb:
   br label %bb2
 
 bb2:                                              ; preds = %bb2, %bb
-  %tmp = phi i32 [ undef, %bb ], [ undef, %bb2 ]
-  %tmp3 = phi i32 [ 0, %bb ], [ undef, %bb2 ]
-  %tmp4 = add i32 %tmp, undef
-  %tmp5 = add i32 undef, %tmp4
+  %tmp = phi i32 [ 3, %bb ], [ 3, %bb2 ]
+  %tmp3 = phi i32 [ 0, %bb ], [ 3, %bb2 ]
+  %tmp4 = add i32 %tmp, 3
+  %tmp5 = add i32 3, %tmp4
   %tmp6 = add i32 %tmp, %tmp5
-  %tmp7 = add i32 undef, %tmp6
+  %tmp7 = add i32 3, %tmp6
   %tmp8 = add i32 %tmp, %tmp7
-  %tmp9 = add i32 undef, %tmp8
+  %tmp9 = add i32 3, %tmp8
   %tmp10 = add i32 %tmp, %tmp9
-  %tmp11 = add i32 undef, %tmp10
+  %tmp11 = add i32 3, %tmp10
   %tmp12 = add i32 %tmp, %tmp11
-  %tmp13 = add i32 undef, %tmp12
+  %tmp13 = add i32 3, %tmp12
   %tmp14 = add i32 %tmp, %tmp13
-  %tmp15 = add i32 undef, %tmp14
+  %tmp15 = add i32 3, %tmp14
   %tmp16 = add i32 %tmp, %tmp15
-  %tmp17 = add i32 undef, %tmp16
+  %tmp17 = add i32 3, %tmp16
   %tmp18 = add i32 %tmp, %tmp17
-  %tmp19 = add i32 undef, %tmp18
+  %tmp19 = add i32 3, %tmp18
   call void @use(i32 %tmp19)
   br label %bb2
 }
@@ -103,8 +103,8 @@ define i64 @test_3() #0 {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2:%.*]] ]
-; CHECK-NEXT:    [[VAL4:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 3, [[BB1]] ], [ 3, [[BB2:%.*]] ]
+; CHECK-NEXT:    [[VAL4:%.*]] = phi i32 [ 3, [[BB1]] ], [ 3, [[BB2]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP1]])
@@ -136,7 +136,7 @@ define i64 @test_3() #0 {
 ; CHECK-NEXT:    [[OP_RDX25:%.*]] = mul i32 [[OP_RDX21]], [[OP_RDX22]]
 ; CHECK-NEXT:    [[OP_RDX26:%.*]] = mul i32 [[OP_RDX23]], [[OP_RDX24]]
 ; CHECK-NEXT:    [[OP_RDX27:%.*]] = mul i32 [[OP_RDX25]], [[OP_RDX26]]
-; CHECK-NEXT:    [[VAL64:%.*]] = add i32 undef, [[OP_RDX27]]
+; CHECK-NEXT:    [[VAL64:%.*]] = add i32 3, [[OP_RDX27]]
 ; CHECK-NEXT:    [[VAL65:%.*]] = sext i32 [[VAL64]] to i64
 ; CHECK-NEXT:    ret i64 [[VAL65]]
 ;
@@ -150,8 +150,8 @@ bb2:                                              ; No predecessors!
   br label %bb3
 
 bb3:                                              ; preds = %bb2, %bb1
-  %val = phi i32 [ undef, %bb1 ], [ undef, %bb2 ]
-  %val4 = phi i32 [ undef, %bb1 ], [ undef, %bb2 ]
+  %val = phi i32 [ 3, %bb1 ], [ 3, %bb2 ]
+  %val4 = phi i32 [ 3, %bb1 ], [ 3, %bb2 ]
   %val5 = mul i32 %val, %val4
   %val6 = mul i32 %val5, %val4
   %val7 = mul i32 %val6, %val4
@@ -211,7 +211,7 @@ bb3:                                              ; preds = %bb2, %bb1
   %val61 = mul i32 %val60, %val4
   %val62 = mul i32 %val61, %val4
   %val63 = mul i32 %val62, %val4
-  %val64 = add i32 undef, %val63
+  %val64 = add i32 3, %val63
   %val65 = sext i32 %val64 to i64
   ret i64 %val65
 }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
index 6d22bb06d5e03..70b7f14a3a2c9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
@@ -4,11 +4,10 @@
 define void @test() {
 ; CHECK-LABEL: define void @test() {
 ; CHECK-NEXT:    [[XOR108_I_I_I:%.*]] = xor i64 0, 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[XOR108_I_I_I]], i32 2
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <12 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0>, i64 [[XOR108_I_I_I]], i32 10
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v4i64(<16 x i64> [[TMP4]], <4 x i64> [[TMP2]], i64 8)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v12i64(<16 x i64> poison, <12 x i64> [[TMP2]], i64 0)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3, i32 7, i32 8, i32 9, i32 3, i32 10, i32 11, i32 12, i32 3>
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll
index bb7964146c44d..d1617c9a382d1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll
@@ -8,23 +8,23 @@ define void @test(i32 %arg) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP6:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP6:%.*]], %[[BB1]] ]
 ; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX4:%.*]], %[[BB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    [[ADD17:%.*]] = add i32 [[PHI]], 0
-; CHECK-NEXT:    [[ADD18:%.*]] = add i32 [[PHI2]], 0
+; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[PHI]], 0
 ; CHECK-NEXT:    [[ADD19:%.*]] = add i32 [[PHI2]], 0
-; CHECK-NEXT:    [[ADD23:%.*]] = add i32 [[PHI2]], 0
+; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[PHI]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4]] = add <2 x i32> [[TMP0]], <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5]] = extractelement <2 x i32> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[TMP6]] = extractelement <2 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP3]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i32 [[TMP7]], [[ADD18]]
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = xor i32 [[ADD17]], [[ADD19]]
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = xor i32 [[ADD23]], [[TMP6]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i32 [[TMP7]], [[ADD17]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = xor i32 [[ADD4]], [[ADD6]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = xor i32 [[ADD19]], [[TMP6]]
 ; CHECK-NEXT:    [[OP_RDX3:%.*]] = xor i32 [[OP_RDX]], [[OP_RDX1]]
 ; CHECK-NEXT:    [[OP_RDX4]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i32 [[TMP5]], 0
diff --git a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
index 40568f9c8a509..c30f94159916a 100644
--- a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
@@ -7,50 +7,52 @@ define i1 @test(float %0, double %1) {
 ; X86-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) {
 ; X86-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3
 ; X86-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double>
-; X86-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[TMP1]], i32 0
-; X86-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]]
-; X86-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 poison, i32 0, i32 3, i32 3>
-; X86-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; X86-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 2, i32 0, i32 1, i32 7>
-; X86-NEXT:    [[TMP10:%.*]] = fmul <4 x double> [[TMP8]], [[TMP9]]
-; X86-NEXT:    [[TMP11:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
-; X86-NEXT:    [[TMP12:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP10]], i64 0)
-; X86-NEXT:    [[TMP13:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP11]], i64 0)
-; X86-NEXT:    [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP13]], <2 x double> [[TMP6]], i64 4)
-; X86-NEXT:    [[TMP15:%.*]] = fsub <8 x double> [[TMP12]], [[TMP14]]
-; X86-NEXT:    [[TMP16:%.*]] = fmul <8 x double> [[TMP12]], [[TMP14]]
-; X86-NEXT:    [[TMP17:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
-; X86-NEXT:    [[TMP18:%.*]] = fptrunc <8 x double> [[TMP17]] to <8 x float>
-; X86-NEXT:    [[TMP19:%.*]] = fmul <8 x float> [[TMP18]], zeroinitializer
-; X86-NEXT:    [[TMP20:%.*]] = fcmp oeq <8 x float> [[TMP19]], zeroinitializer
-; X86-NEXT:    [[TMP21:%.*]] = freeze <8 x i1> [[TMP20]]
-; X86-NEXT:    [[TMP22:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP21]])
-; X86-NEXT:    ret i1 [[TMP22]]
+; X86-NEXT:    [[TMP5:%.*]] = insertelement <6 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00>, double [[TMP1]], i32 4
+; X86-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
+; X86-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 6, i32 7, i32 8, i32 9, i32 4, i32 5>
+; X86-NEXT:    [[TMP8:%.*]] = fmul <6 x double> zeroinitializer, [[TMP7]]
+; X86-NEXT:    [[TMP9:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 11, i32 11>
+; X86-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; X86-NEXT:    [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 poison>
+; X86-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; X86-NEXT:    [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]]
+; X86-NEXT:    [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP13]], i64 0)
+; X86-NEXT:    [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <6 x double> [[TMP8]], i64 0)
+; X86-NEXT:    [[TMP16:%.*]] = fsub <8 x double> [[TMP14]], [[TMP15]]
+; X86-NEXT:    [[TMP17:%.*]] = fmul <8 x double> [[TMP14]], [[TMP15]]
+; X86-NEXT:    [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
+; X86-NEXT:    [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float>
+; X86-NEXT:    [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer
+; X86-NEXT:    [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer
+; X86-NEXT:    [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]]
+; X86-NEXT:    [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]])
+; X86-NEXT:    ret i1 [[TMP23]]
 ;
 ; AARCH64-LABEL: define i1 @test
 ; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) {
 ; AARCH64-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3
 ; AARCH64-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double>
-; AARCH64-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[TMP1]], i32 0
-; AARCH64-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]]
-; AARCH64-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 poison, i32 0, i32 3, i32 3>
-; AARCH64-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; AARCH64-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 2, i32 0, i32 poison, i32 7>
-; AARCH64-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
-; AARCH64-NEXT:    [[TMP11:%.*]] = fmul <4 x double> [[TMP8]], [[TMP10]]
-; AARCH64-NEXT:    [[TMP12:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
-; AARCH64-NEXT:    [[TMP13:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP11]], i64 0)
-; AARCH64-NEXT:    [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP12]], i64 0)
-; AARCH64-NEXT:    [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP14]], <2 x double> [[TMP6]], i64 4)
-; AARCH64-NEXT:    [[TMP16:%.*]] = fsub <8 x double> [[TMP13]], [[TMP15]]
-; AARCH64-NEXT:    [[TMP17:%.*]] = fmul <8 x double> [[TMP13]], [[TMP15]]
-; AARCH64-NEXT:    [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
-; AARCH64-NEXT:    [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float>
-; AARCH64-NEXT:    [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer
-; AARCH64-NEXT:    [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer
-; AARCH64-NEXT:    [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]]
-; AARCH64-NEXT:    [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]])
-; AARCH64-NEXT:    ret i1 [[TMP23]]
+; AARCH64-NEXT:    [[TMP5:%.*]] = insertelement <6 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00>, double [[TMP1]], i32 4
+; AARCH64-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
+; AARCH64-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 6, i32 7, i32 8, i32 9, i32 4, i32 5>
+; AARCH64-NEXT:    [[TMP8:%.*]] = fmul <6 x double> zeroinitializer, [[TMP7]]
+; AARCH64-NEXT:    [[TMP9:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 11, i32 11>
+; AARCH64-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; AARCH64-NEXT:    [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> <i32 2, i32 0, i32 poison, i32 poison>
+; AARCH64-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
+; AARCH64-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+; AARCH64-NEXT:    [[TMP14:%.*]] = fmul <4 x double> [[TMP10]], [[TMP13]]
+; AARCH64-NEXT:    [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP14]], i64 0)
+; AARCH64-NEXT:    [[TMP16:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <6 x double> [[TMP8]], i64 0)
+; AARCH64-NEXT:    [[TMP17:%.*]] = fsub <8 x double> [[TMP15]], [[TMP16]]
+; AARCH64-NEXT:    [[TMP18:%.*]] = fmul <8 x double> [[TMP15]], [[TMP16]]
+; AARCH64-NEXT:    [[TMP19:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
+; AARCH64-NEXT:    [[TMP20:%.*]] = fptrunc <8 x double> [[TMP19]] to <8 x float>
+; AARCH64-NEXT:    [[TMP21:%.*]] = fmul <8 x float> [[TMP20]], zeroinitializer
+; AARCH64-NEXT:    [[TMP22:%.*]] = fcmp oeq <8 x float> [[TMP21]], zeroinitializer
+; AARCH64-NEXT:    [[TMP23:%.*]] = freeze <8 x i1> [[TMP22]]
+; AARCH64-NEXT:    [[TMP24:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP23]])
+; AARCH64-NEXT:    ret i1 [[TMP24]]
 ;
   %3 = fpext float %0 to double
   %4 = fpext float 0.000000e+00 to double
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index 5001ffdeab9e2..bba92befc089b 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -1,5 +1,7 @@
 ## Test that reconstructs a dSYM file from YAML and generates a gsym from it. The gsym has callsite info and merged functions.
 
+# TODO: Add line numbers instead of ':{{.}}' below - after introducing accurate DIE-based lookups: https://github.com/llvm/llvm-project/pull/123391
+
 # RUN: split-file %s %t
 # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
 
@@ -42,6 +44,50 @@
 # CHECK-MERGED-CALLSITES-NEXT:   0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
 # CHECK-MERGED-CALLSITES-NEXT:   0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
 
+
+### Check that we can correctly resove merged functions using callstacks:
+### Resolve two callstacks containing merged functions.
+### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`.
+### The callstacks resolve differently based on the merged functions filter.
+###     0x00000001000003d0  =>  0x000000010000037c  =>  0x000000010000035c  =>  0x0000000100000340
+###     0x00000001000003e8  =========================>  0x000000010000035c  =>  0x0000000100000340
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s
+# CHECK-C1:       0x00000001000003d0: main + 32 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:63
+# CHECK-C1-NEXT:      CallSites: function2_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s
+# CHECK-C2:       0x000000010000037c: function_inlined + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:35 [inlined]
+# CHECK-C2-NEXT:                   function2_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:48
+# CHECK-C2-NEXT:     CallSites: function3_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s
+# CHECK-C3:       Found 1 function at address 0x000000010000035c:
+# CHECK-C3-NEXT:     0x000000010000035c: function3_copy1 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:{{.}}
+# CHECK-C3-NEXT:        CallSites: function4_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s
+# CHECK-C4:       Found 1 function at address 0x0000000100000340:
+# CHECK-C4-NEXT:     0x0000000100000340: function4_copy1 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:{{.}}
+
+### ----------------------------------------------------------------------------------------------------------------------------------
+### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003e8 --merged-functions | FileCheck --check-prefix=CHECK-C5 %s
+# CHECK-C5:       Found 1 function at address 0x00000001000003e8:
+# CHECK-C5-NEXT:     0x00000001000003e8: main + 56 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:64
+# CHECK-C5-NEXT:        CallSites: function3_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s
+# CHECK-C6:       Found 1 function at address 0x000000010000035c:
+# CHECK-C6-NEXT:     0x000000010000035c: function3_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:{{.}}
+# CHECK-C6-NEXT:        CallSites: function4_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --merged-functions-filter="function4_copy2" --address=0x0000000100000340 | FileCheck --check-prefix=CHECK-C7 %s
+# CHECK-C7:       Found 1 function at address 0x0000000100000340:
+# CHECK-C7-NEXT:     0x0000000100000340: function4_copy2 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:{{.}}
+
+
 #--- merged_funcs_test.cpp
 #define ATTRIB extern "C" __attribute__((noinline))
 volatile int global_result = 0;
diff --git a/llvm/test/tools/llvm-lib/member-names.test b/llvm/test/tools/llvm-lib/member-names.test
new file mode 100644
index 0000000000000..75437a02f30a2
--- /dev/null
+++ b/llvm/test/tools/llvm-lib/member-names.test
@@ -0,0 +1,37 @@
+RUN: rm -rf %t
+RUN: mkdir -p %t/foo
+RUN: split-file %s %t
+RUN: cd %t
+
+RUN: llvm-mc -triple=x86_64-pc-windows-msvc -filetype=obj -o foo/obj.o %S/Inputs/a.s
+
+# For a regular, non-thin archive, check that we store the path in the form
+# it was passed (foo/obj.o), not as a path relative to the archive.
+RUN: llvm-lib -out:foo/regular.a foo/obj.o
+RUN: llvm-lib -list foo/regular.a | FileCheck %s --check-prefix=REGULAR --match-full-lines
+REGULAR: foo/obj.o
+
+# When merging two import libraries, make sure that the member names stay
+# unchanged.
+RUN: llvm-lib -machine:x64 -out:foo.lib -def:foo.def
+RUN: llvm-lib -machine:x64 -out:bar.lib -def:bar.def
+RUN: llvm-lib -out:foo/merged.lib foo.lib bar.lib
+RUN: llvm-lib -list foo/merged.lib | FileCheck %s --check-prefix=MERGED --match-full-lines
+MERGED: foo.dll
+MERGED: foo.dll
+MERGED: foo.dll
+MERGED: foo.dll
+MERGED: bar.dll
+MERGED: bar.dll
+MERGED: bar.dll
+MERGED: bar.dll
+
+#--- foo.def
+LIBRARY foo.dll
+EXPORTS
+        func1
+
+#--- bar.def
+LIBRARY bar.dll
+EXPORTS
+        func2
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/jump.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/jump.s
new file mode 100644
index 0000000000000..f23a9ccfca3be
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/jump.s
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-u74 -timeline -iterations=1 < %s \
+# RUN:   | FileCheck %s
+
+jal x0, 1f
+1:
+jal a0, 1f
+1:
+jalr x0, a0
+jalr t0, a0
+ret
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      5
+# CHECK-NEXT: Total Cycles:      200
+# CHECK-NEXT: Total uOps:        5
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.03
+# CHECK-NEXT: IPC:               0.03
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                        j	.Ltmp0
+# CHECK-NEXT:  1      3     1.00                        jal	a0, .Ltmp1
+# CHECK-NEXT:  1      3     1.00                        jr	a0
+# CHECK-NEXT:  1      3     1.00                        jalr	t0, a0
+# CHECK-NEXT:  1      3     1.00                        ret
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SiFive7FDiv
+# CHECK-NEXT: [1]   - SiFive7IDiv
+# CHECK-NEXT: [2]   - SiFive7PipeA
+# CHECK-NEXT: [3]   - SiFive7PipeB
+# CHECK-NEXT: [4]   - SiFive7VA
+# CHECK-NEXT: [5]   - SiFive7VCQ
+# CHECK-NEXT: [6]   - SiFive7VL
+# CHECK-NEXT: [7]   - SiFive7VS
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
+# CHECK-NEXT:  -      -      -     5.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     j	.Ltmp0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     jal	a0, .Ltmp1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     jr	a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     jalr	t0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     ret
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123
+
+# CHECK:      [0,0]     DeeE   j	.Ltmp0
+# CHECK-NEXT: Truncated display due to cycle limit
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       j	.Ltmp0
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       jal	a0, .Ltmp1
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       jr	a0
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       jalr	t0, a0
+# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ret
+# CHECK-NEXT:        1     0.0    0.0    0.0       <total>
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 89cd3ce6fc413..15bc064ba6f2c 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,3 +46,8 @@ def addresses_from_stdin :
 defm json_summary_file :
   Eq<"json-summary-file",
      "Output a categorized summary of errors into the JSON file specified.">;
+defm merged_functions_filter : 
+  Eq<"merged-functions-filter", 
+     "When used with --address/--addresses-from-stdin and --merged-functions,\n"
+     "filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
+     "Can be specified multiple times.">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 654da68bb6960..84934976be2c8 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin;
 static bool UseMergedFunctions = false;
 static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
+static std::vector<std::string> MergedFunctionsFilters;
 
 static void parseArgs(int argc, char **argv) {
   GSYMUtilOptTable Tbl;
@@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) {
   }
 
   LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
+
+  for (const llvm::opt::Arg *A :
+       Args.filtered(OPT_merged_functions_filter_EQ)) {
+    MergedFunctionsFilters.push_back(A->getValue());
+    // Validate the filter is only used with correct flags
+    if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
+      llvm::errs() << ToolName
+                   << ": --merged-functions-filter can only be used with "
+                      "--address/--addresses-from-stdin\n";
+      std::exit(1);
+    }
+    if (!UseMergedFunctions) {
+      llvm::errs()
+          << ToolName
+          << ": --merged-functions-filter requires --merged-functions\n";
+      std::exit(1);
+    }
+  }
 }
 
 /// @}
@@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
   if (UseMergedFunctions) {
     if (auto Results = Gsym.lookupAll(Addr)) {
-      OS << "Found " << Results->size() << " functions at address "
-         << HEX64(Addr) << ":\n";
+      // If we have filters, count matching results first
+      size_t NumMatching = Results->size();
+      if (!MergedFunctionsFilters.empty()) {
+        NumMatching = 0;
+        for (const auto &Result : *Results) {
+          bool Matches = false;
+          for (const auto &Filter : MergedFunctionsFilters) {
+            Regex Pattern(Filter);
+            if (Pattern.match(Result.FuncName)) {
+              Matches = true;
+              break;
+            }
+          }
+          if (Matches)
+            NumMatching++;
+        }
+      }
+
+      OS << "Found " << NumMatching << " function"
+         << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
+         << ":\n";
+
       for (size_t i = 0; i < Results->size(); ++i) {
+        // Skip if doesn't match any filter
+        if (!MergedFunctionsFilters.empty()) {
+          bool Matches = false;
+          for (const auto &Filter : MergedFunctionsFilters) {
+            Regex Pattern(Filter);
+            if (Pattern.match(Results->at(i).FuncName)) {
+              Matches = true;
+              break;
+            }
+          }
+          if (!Matches)
+            continue;
+        }
+
         OS << "   " << Results->at(i);
 
         if (i != Results->size() - 1)
@@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
           OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
         }
       }
+      // Don't print call site info if --merged-functions is not specified.
+      Result->CallSiteFuncRegex.clear();
       OS << Result.get();
     } else {
       if (Verbose)
diff --git a/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
index e8b22b3d4bbb7..3de77031291c5 100644
--- a/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 #include "OrcTestCommon.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
 
 using namespace llvm;
 using namespace llvm::orc;
@@ -107,4 +108,35 @@ TEST(ExecutorAddrTest, AddrRanges) {
   EXPECT_GT(R1, R0);
 }
 
+TEST(ExecutorSymbolDef, PointerConversion) {
+  int X = 0;
+
+  auto XHiddenSym = ExecutorSymbolDef::fromPtr(&X);
+  int *XHiddenPtr = XHiddenSym.toPtr<int *>();
+
+  auto XExportedSym = ExecutorSymbolDef::fromPtr(&X, JITSymbolFlags::Exported);
+  int *XExportedPtr = XExportedSym.toPtr<int *>();
+
+  EXPECT_EQ(XHiddenPtr, &X);
+  EXPECT_EQ(XExportedPtr, &X);
+
+  EXPECT_EQ(XHiddenSym.getFlags(), JITSymbolFlags());
+  EXPECT_EQ(XExportedSym.getFlags(), JITSymbolFlags::Exported);
+}
+
+TEST(ExecutorSymbolDef, FunctionPointerConversion) {
+  auto FHiddenSym = ExecutorSymbolDef::fromPtr(&F);
+  void (*FHiddenPtr)() = FHiddenSym.toPtr<void()>();
+
+  auto FExportedSym = ExecutorSymbolDef::fromPtr(&F, JITSymbolFlags::Exported);
+  void (*FExportedPtr)() = FExportedSym.toPtr<void()>();
+
+  EXPECT_EQ(FHiddenPtr, &F);
+  EXPECT_EQ(FExportedPtr, &F);
+
+  EXPECT_EQ(FHiddenSym.getFlags(), JITSymbolFlags::Callable);
+  EXPECT_EQ(FExportedSym.getFlags(),
+            JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+}
+
 } // namespace
diff --git a/llvm/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
index 9b2a66353260d..f4b22e7ec4da5 100644
--- a/llvm/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
@@ -19,7 +19,7 @@ TEST(IndirectionUtilsTest, MakeStub) {
   LLVMContext Context;
   ModuleBuilder MB(Context, "x86_64-apple-macosx10.10", "");
   StructType *ArgTy = getDummyStructTy(Context);
-  Type *ArgPtrTy = PointerType::getUnqual(ArgTy);
+  Type *ArgPtrTy = PointerType::getUnqual(Context);
   FunctionType *FTy = FunctionType::get(
       Type::getVoidTy(Context), {ArgPtrTy, ArgPtrTy}, false);
   Function *F = MB.createFunctionDecl(FTy, "");
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index a7b513bdfdc66..f620d2c968b3f 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -3087,8 +3087,7 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
-  PointerType *CriticalNamePtrTy =
-      PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
+  PointerType *CriticalNamePtrTy = PointerType::getUnqual(Ctx);
   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
   GlobalVariable *GV =
       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp
index c8444ab7dfd3b..53ebe52b4f21b 100644
--- a/llvm/unittests/IR/ConstantsTest.cpp
+++ b/llvm/unittests/IR/ConstantsTest.cpp
@@ -184,9 +184,9 @@ TEST(ConstantsTest, AsInstructionsTest) {
   Type *Int16Ty = Type::getInt16Ty(Context);
 
   Constant *Global =
-      M->getOrInsertGlobal("dummy", PointerType::getUnqual(Int32Ty));
+      M->getOrInsertGlobal("dummy", PointerType::getUnqual(Context));
   Constant *Global2 =
-      M->getOrInsertGlobal("dummy2", PointerType::getUnqual(Int32Ty));
+      M->getOrInsertGlobal("dummy2", PointerType::getUnqual(Context));
 
   Constant *P0 = ConstantExpr::getPtrToInt(Global, Int32Ty);
   Constant *P4 = ConstantExpr::getPtrToInt(Global2, Int32Ty);
@@ -222,7 +222,7 @@ TEST(ConstantsTest, AsInstructionsTest) {
   //        not a normal one!
   // CHECK(ConstantExpr::getGetElementPtr(Global, V, false),
   //      "getelementptr i32*, i32** @dummy, i32 1");
-  CHECK(ConstantExpr::getInBoundsGetElementPtr(PointerType::getUnqual(Int32Ty),
+  CHECK(ConstantExpr::getInBoundsGetElementPtr(PointerType::getUnqual(Context),
                                                Global, V),
         "getelementptr inbounds ptr, ptr @dummy, i32 1");
 
@@ -250,9 +250,9 @@ TEST(ConstantsTest, ReplaceWithConstantTest) {
   Constant *One = ConstantInt::get(Int32Ty, 1);
 
   Constant *Global =
-      M->getOrInsertGlobal("dummy", PointerType::getUnqual(Int32Ty));
+      M->getOrInsertGlobal("dummy", PointerType::getUnqual(Context));
   Constant *GEP = ConstantExpr::getGetElementPtr(
-      PointerType::getUnqual(Int32Ty), Global, One);
+      PointerType::getUnqual(Context), Global, One);
   EXPECT_DEATH(Global->replaceAllUsesWith(GEP),
                "this->replaceAllUsesWith\\(expr\\(this\\)\\) is NOT valid!");
 }
diff --git a/llvm/unittests/IR/DroppedVariableStatsIRTest.cpp b/llvm/unittests/IR/DroppedVariableStatsIRTest.cpp
index 34803a9771850..72d8373fc45df 100644
--- a/llvm/unittests/IR/DroppedVariableStatsIRTest.cpp
+++ b/llvm/unittests/IR/DroppedVariableStatsIRTest.cpp
@@ -79,7 +79,7 @@ TEST(DroppedVariableStatsIR, BothDeleted) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -134,7 +134,7 @@ TEST(DroppedVariableStatsIR, DbgValLost) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -189,7 +189,7 @@ TEST(DroppedVariableStatsIR, UnrelatedScopes) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -244,7 +244,7 @@ TEST(DroppedVariableStatsIR, ChildScopes) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -300,7 +300,7 @@ TEST(DroppedVariableStatsIR, InlinedAt) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -356,7 +356,7 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
@@ -413,7 +413,7 @@ TEST(DroppedVariableStatsIR, InlinedAtChild) {
   ASSERT_TRUE(M);
 
   DroppedVariableStatsIR Stats(true);
-  Stats.runBeforePass(llvm::Any(const_cast<const llvm::Module *>(M.get())));
+  Stats.runBeforePass("", llvm::Any(const_cast<const llvm::Module *>(M.get())));
 
   // This loop simulates an IR pass that drops debug information.
   for (auto &F : *M) {
diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp
index bbd692b69c722..c1d3279688858 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -850,8 +850,8 @@ TEST(InstructionsTest, GEPIndices) {
     Builder.getInt32(13),
     Builder.getInt32(42) };
 
-  Value *V = Builder.CreateGEP(ArrTy, UndefValue::get(PointerType::getUnqual(ArrTy)),
-                               Indices);
+  Value *V = Builder.CreateGEP(
+      ArrTy, UndefValue::get(PointerType::getUnqual(Context)), Indices);
   ASSERT_TRUE(isa<GetElementPtrInst>(V));
 
   auto *GEPI = cast<GetElementPtrInst>(V);
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index d06ada5953445..e7600209e0b14 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -2606,8 +2606,7 @@ TYPED_TEST(MutableConstTest, FCmp) {
 }
 
 TEST_F(PatternMatchTest, ConstExpr) {
-  Constant *G =
-      M->getOrInsertGlobal("dummy", PointerType::getUnqual(IRB.getInt32Ty()));
+  Constant *G = M->getOrInsertGlobal("dummy", PointerType::getUnqual(Ctx));
   Constant *S = ConstantExpr::getPtrToInt(G, IRB.getInt32Ty());
   Type *VecTy = FixedVectorType::get(IRB.getInt32Ty(), 2);
   PoisonValue *P = PoisonValue::get(VecTy);
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 73e8ef283fc2a..9d1c86a9b9c72 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -5150,8 +5150,8 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) {
   auto *Ti16 = sandboxir::Type::getInt16Ty(Ctx);
   auto *Tdouble = sandboxir::Type::getDoubleTy(Ctx);
   auto *Tfloat = sandboxir::Type::getFloatTy(Ctx);
-  auto *Tptr = sandboxir::PointerType::get(Tfloat, 0);
-  auto *Tptr1 = sandboxir::PointerType::get(Tfloat, 1);
+  auto *Tptr = sandboxir::PointerType::get(Ctx, 0);
+  auto *Tptr1 = sandboxir::PointerType::get(Ctx, 1);
 
   // Check classof(), getOpcode(), getSrcTy(), getDstTy()
   auto *ZExt = cast<sandboxir::CastInst>(&*It++);
diff --git a/llvm/unittests/SandboxIR/TypesTest.cpp b/llvm/unittests/SandboxIR/TypesTest.cpp
index 6ccd08d4e710f..d87f034bd047e 100644
--- a/llvm/unittests/SandboxIR/TypesTest.cpp
+++ b/llvm/unittests/SandboxIR/TypesTest.cpp
@@ -217,10 +217,6 @@ define void @foo(ptr %ptr) {
   auto *F = Ctx.createFunction(LLVMF);
   // Check classof(), creation.
   auto *PtrTy = cast<sandboxir::PointerType>(F->getArg(0)->getType());
-  // Check get(ElementType, AddressSpace).
-  auto *NewPtrTy =
-      sandboxir::PointerType::get(sandboxir::Type::getInt32Ty(Ctx), 0u);
-  EXPECT_EQ(NewPtrTy, PtrTy);
   // Check get(Ctx, AddressSpace).
   auto *NewPtrTy2 = sandboxir::PointerType::get(Ctx, 0u);
   EXPECT_EQ(NewPtrTy2, PtrTy);
diff --git a/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp b/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp
index 27ea8b8cf06e8..149db48c190a0 100644
--- a/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp
+++ b/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp
@@ -36,7 +36,9 @@ class SPIRVAPITest : public testing::Test {
                const std::vector<std::string> &AllowExtNames,
                const std::vector<std::string> &Opts) {
     SMDiagnostic ParseError;
-    M = parseAssemblyString(Assembly, ParseError, Context);
+    LLVMContext Context;
+    std::unique_ptr<Module> M =
+        parseAssemblyString(Assembly, ParseError, Context);
     if (!M) {
       ParseError.print("IR parsing failed: ", errs());
       report_fatal_error("Can't parse input assembly.");
@@ -48,9 +50,6 @@ class SPIRVAPITest : public testing::Test {
     return Status;
   }
 
-  LLVMContext Context;
-  std::unique_ptr<Module> M;
-
   static constexpr StringRef ExtensionAssembly = R"(
     define dso_local spir_func void @test1() {
     entry:
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 96a74c6fd89f7..67291214c14e6 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -60,6 +60,7 @@ add_tablegen(llvm-tblgen LLVM
   PseudoLoweringEmitter.cpp
   RegisterBankEmitter.cpp
   RegisterInfoEmitter.cpp
+  SDNodeInfoEmitter.cpp
   SearchableTableEmitter.cpp
   SubtargetEmitter.cpp
   WebAssemblyDisassemblerEmitter.cpp
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index 013135a9def1f..59148a96d8e92 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -1537,21 +1537,19 @@ SDTypeConstraint::SDTypeConstraint(const Record *R, const CodeGenHwModes &CGH) {
     ConstraintType = SDTCisVec;
   } else if (R->isSubClassOf("SDTCisSameAs")) {
     ConstraintType = SDTCisSameAs;
-    x.SDTCisSameAs_Info.OtherOperandNum = R->getValueAsInt("OtherOperandNum");
+    OtherOperandNo = R->getValueAsInt("OtherOperandNum");
   } else if (R->isSubClassOf("SDTCisVTSmallerThanOp")) {
     ConstraintType = SDTCisVTSmallerThanOp;
-    x.SDTCisVTSmallerThanOp_Info.OtherOperandNum =
-        R->getValueAsInt("OtherOperandNum");
+    OtherOperandNo = R->getValueAsInt("OtherOperandNum");
   } else if (R->isSubClassOf("SDTCisOpSmallerThanOp")) {
     ConstraintType = SDTCisOpSmallerThanOp;
-    x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
-        R->getValueAsInt("BigOperandNum");
+    OtherOperandNo = R->getValueAsInt("BigOperandNum");
   } else if (R->isSubClassOf("SDTCisEltOfVec")) {
     ConstraintType = SDTCisEltOfVec;
-    x.SDTCisEltOfVec_Info.OtherOperandNum = R->getValueAsInt("OtherOpNum");
+    OtherOperandNo = R->getValueAsInt("OtherOpNum");
   } else if (R->isSubClassOf("SDTCisSubVecOfVec")) {
     ConstraintType = SDTCisSubVecOfVec;
-    x.SDTCisSubVecOfVec_Info.OtherOperandNum = R->getValueAsInt("OtherOpNum");
+    OtherOperandNo = R->getValueAsInt("OtherOpNum");
   } else if (R->isSubClassOf("SDTCVecEltisVT")) {
     ConstraintType = SDTCVecEltisVT;
     VVT = getValueTypeByHwMode(R->getValueAsDef("VT"), CGH);
@@ -1566,12 +1564,10 @@ SDTypeConstraint::SDTypeConstraint(const Record *R, const CodeGenHwModes &CGH) {
     }
   } else if (R->isSubClassOf("SDTCisSameNumEltsAs")) {
     ConstraintType = SDTCisSameNumEltsAs;
-    x.SDTCisSameNumEltsAs_Info.OtherOperandNum =
-        R->getValueAsInt("OtherOperandNum");
+    OtherOperandNo = R->getValueAsInt("OtherOperandNum");
   } else if (R->isSubClassOf("SDTCisSameSizeAs")) {
     ConstraintType = SDTCisSameSizeAs;
-    x.SDTCisSameSizeAs_Info.OtherOperandNum =
-        R->getValueAsInt("OtherOperandNum");
+    OtherOperandNo = R->getValueAsInt("OtherOperandNum");
   } else {
     PrintFatalError(R->getLoc(),
                     "Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
@@ -1632,7 +1628,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode &N,
   case SDTCisSameAs: {
     unsigned OResNo = 0;
     TreePatternNode &OtherNode =
-        getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+        getOperandNum(OtherOperandNo, N, NodeInfo, OResNo);
     return (int)NodeToApply.UpdateNodeType(ResNo, OtherNode.getExtType(OResNo),
                                            TP) |
            (int)OtherNode.UpdateNodeType(OResNo, NodeToApply.getExtType(ResNo),
@@ -1654,23 +1650,23 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode &N,
     TypeSetByHwMode TypeListTmp(VVT);
 
     unsigned OResNo = 0;
-    TreePatternNode &OtherNode = getOperandNum(
-        x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    TreePatternNode &OtherNode =
+        getOperandNum(OtherOperandNo, N, NodeInfo, OResNo);
 
     return TI.EnforceSmallerThan(TypeListTmp, OtherNode.getExtType(OResNo),
                                  /*SmallIsVT*/ true);
   }
   case SDTCisOpSmallerThanOp: {
     unsigned BResNo = 0;
-    TreePatternNode &BigOperand = getOperandNum(
-        x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NodeInfo, BResNo);
+    TreePatternNode &BigOperand =
+        getOperandNum(OtherOperandNo, N, NodeInfo, BResNo);
     return TI.EnforceSmallerThan(NodeToApply.getExtType(ResNo),
                                  BigOperand.getExtType(BResNo));
   }
   case SDTCisEltOfVec: {
     unsigned VResNo = 0;
-    TreePatternNode &VecOperand = getOperandNum(
-        x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo, VResNo);
+    TreePatternNode &VecOperand =
+        getOperandNum(OtherOperandNo, N, NodeInfo, VResNo);
     // Filter vector types out of VecOperand that don't have the right element
     // type.
     return TI.EnforceVectorEltTypeIs(VecOperand.getExtType(VResNo),
@@ -1678,8 +1674,8 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode &N,
   }
   case SDTCisSubVecOfVec: {
     unsigned VResNo = 0;
-    TreePatternNode &BigVecOperand = getOperandNum(
-        x.SDTCisSubVecOfVec_Info.OtherOperandNum, N, NodeInfo, VResNo);
+    TreePatternNode &BigVecOperand =
+        getOperandNum(OtherOperandNo, N, NodeInfo, VResNo);
 
     // Filter vector types out of BigVecOperand that don't have the
     // right subvector type.
@@ -1691,15 +1687,15 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode &N,
   }
   case SDTCisSameNumEltsAs: {
     unsigned OResNo = 0;
-    TreePatternNode &OtherNode = getOperandNum(
-        x.SDTCisSameNumEltsAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    TreePatternNode &OtherNode =
+        getOperandNum(OtherOperandNo, N, NodeInfo, OResNo);
     return TI.EnforceSameNumElts(OtherNode.getExtType(OResNo),
                                  NodeToApply.getExtType(ResNo));
   }
   case SDTCisSameSizeAs: {
     unsigned OResNo = 0;
-    TreePatternNode &OtherNode = getOperandNum(
-        x.SDTCisSameSizeAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    TreePatternNode &OtherNode =
+        getOperandNum(OtherOperandNo, N, NodeInfo, OResNo);
     return TI.EnforceSameSize(OtherNode.getExtType(OResNo),
                               NodeToApply.getExtType(ResNo));
   }
@@ -1707,6 +1703,58 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode &N,
   llvm_unreachable("Invalid ConstraintType!");
 }
 
+bool llvm::operator==(const SDTypeConstraint &LHS,
+                      const SDTypeConstraint &RHS) {
+  if (std::tie(LHS.OperandNo, LHS.ConstraintType) !=
+      std::tie(RHS.OperandNo, RHS.ConstraintType))
+    return false;
+  switch (LHS.ConstraintType) {
+  case SDTypeConstraint::SDTCisVT:
+  case SDTypeConstraint::SDTCVecEltisVT:
+    return LHS.VVT == RHS.VVT;
+  case SDTypeConstraint::SDTCisPtrTy:
+  case SDTypeConstraint::SDTCisInt:
+  case SDTypeConstraint::SDTCisFP:
+  case SDTypeConstraint::SDTCisVec:
+    break;
+  case SDTypeConstraint::SDTCisSameAs:
+  case SDTypeConstraint::SDTCisVTSmallerThanOp:
+  case SDTypeConstraint::SDTCisOpSmallerThanOp:
+  case SDTypeConstraint::SDTCisEltOfVec:
+  case SDTypeConstraint::SDTCisSubVecOfVec:
+  case SDTypeConstraint::SDTCisSameNumEltsAs:
+  case SDTypeConstraint::SDTCisSameSizeAs:
+    return LHS.OtherOperandNo == RHS.OtherOperandNo;
+  }
+  return true;
+}
+
+bool llvm::operator<(const SDTypeConstraint &LHS, const SDTypeConstraint &RHS) {
+  if (std::tie(LHS.OperandNo, LHS.ConstraintType) !=
+      std::tie(RHS.OperandNo, RHS.ConstraintType))
+    return std::tie(LHS.OperandNo, LHS.ConstraintType) <
+           std::tie(RHS.OperandNo, RHS.ConstraintType);
+  switch (LHS.ConstraintType) {
+  case SDTypeConstraint::SDTCisVT:
+  case SDTypeConstraint::SDTCVecEltisVT:
+    return LHS.VVT < RHS.VVT;
+  case SDTypeConstraint::SDTCisPtrTy:
+  case SDTypeConstraint::SDTCisInt:
+  case SDTypeConstraint::SDTCisFP:
+  case SDTypeConstraint::SDTCisVec:
+    break;
+  case SDTypeConstraint::SDTCisSameAs:
+  case SDTypeConstraint::SDTCisVTSmallerThanOp:
+  case SDTypeConstraint::SDTCisOpSmallerThanOp:
+  case SDTypeConstraint::SDTCisEltOfVec:
+  case SDTypeConstraint::SDTCisSubVecOfVec:
+  case SDTypeConstraint::SDTCisSameNumEltsAs:
+  case SDTypeConstraint::SDTCisSameSizeAs:
+    return LHS.OtherOperandNo < RHS.OtherOperandNo;
+  }
+  return false;
+}
+
 // Update the node type to match an instruction operand or result as specified
 // in the ins or outs lists on the instruction definition. Return true if the
 // type was actually changed.
@@ -1797,6 +1845,14 @@ SDNodeInfo::SDNodeInfo(const Record *R, const CodeGenHwModes &CGH) : Def(R) {
 
   // Parse the properties.
   Properties = parseSDPatternOperatorProperties(R);
+  IsStrictFP = R->getValueAsBit("IsStrictFP");
+
+  std::optional<int64_t> MaybeTSFlags =
+      R->getValueAsBitsInit("TSFlags")->convertInitializerToInt();
+  if (!MaybeTSFlags)
+    PrintFatalError(R->getLoc(), "Invalid TSFlags");
+  assert(isUInt<32>(*MaybeTSFlags) && "TSFlags bit width out of sync");
+  TSFlags = *MaybeTSFlags;
 
   // Parse the type constraints.
   for (const Record *R : TypeProfile->getValueAsListOfDefs("Constraints"))
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
index f8c3917293825..6a6f1a6ac437c 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
@@ -354,10 +354,11 @@ typedef StringSet<> MultipleUseVarSet;
 /// SDTypeConstraint - This is a discriminated union of constraints,
 /// corresponding to the SDTypeConstraint tablegen class in Target.td.
 struct SDTypeConstraint {
+  SDTypeConstraint() = default;
   SDTypeConstraint(const Record *R, const CodeGenHwModes &CGH);
 
   unsigned OperandNo; // The operand # this constraint applies to.
-  enum {
+  enum KindTy {
     SDTCisVT,
     SDTCisPtrTy,
     SDTCisInt,
@@ -373,29 +374,7 @@ struct SDTypeConstraint {
     SDTCisSameSizeAs
   } ConstraintType;
 
-  union { // The discriminated union.
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisSameAs_Info;
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisVTSmallerThanOp_Info;
-    struct {
-      unsigned BigOperandNum;
-    } SDTCisOpSmallerThanOp_Info;
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisEltOfVec_Info;
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisSubVecOfVec_Info;
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisSameNumEltsAs_Info;
-    struct {
-      unsigned OtherOperandNum;
-    } SDTCisSameSizeAs_Info;
-  } x;
+  unsigned OtherOperandNo;
 
   // The VT for SDTCisVT and SDTCVecEltisVT.
   // Must not be in the union because it has a non-trivial destructor.
@@ -407,6 +386,11 @@ struct SDTypeConstraint {
   /// is flagged.
   bool ApplyTypeConstraint(TreePatternNode &N, const SDNodeInfo &NodeInfo,
                            TreePattern &TP) const;
+
+  friend bool operator==(const SDTypeConstraint &LHS,
+                         const SDTypeConstraint &RHS);
+  friend bool operator<(const SDTypeConstraint &LHS,
+                        const SDTypeConstraint &RHS);
 };
 
 /// ScopedName - A name of a node associated with a "scope" that indicates
@@ -438,9 +422,11 @@ class SDNodeInfo {
   const Record *Def;
   StringRef EnumName;
   StringRef SDClassName;
-  unsigned Properties;
   unsigned NumResults;
   int NumOperands;
+  unsigned Properties;
+  bool IsStrictFP;
+  uint32_t TSFlags;
   std::vector<SDTypeConstraint> TypeConstraints;
 
 public:
@@ -465,10 +451,16 @@ class SDNodeInfo {
   /// MVT::SimpleValueType.  Otherwise, return MVT::Other.
   MVT::SimpleValueType getKnownType(unsigned ResNo) const;
 
+  unsigned getProperties() const { return Properties; }
+
   /// hasProperty - Return true if this node has the specified property.
   ///
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
 
+  bool isStrictFP() const { return IsStrictFP; }
+
+  uint32_t getTSFlags() const { return TSFlags; }
+
   /// ApplyTypeConstraints - Given a node in a pattern, apply the type
   /// constraints for this node to the operands of the node.  This returns
   /// true if it makes a change, false otherwise.  If a type contradiction is
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 7488c8de57885..70f2aa6522640 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -56,6 +56,7 @@ struct DXILOperationDesc {
   SmallVector<const Record *> OverloadRecs;
   SmallVector<const Record *> StageRecs;
   SmallVector<const Record *> AttrRecs;
+  SmallVector<const Record *> PropRecs;
   SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
   SmallVector<StringRef, 4>
       ShaderStages; // shader stages to which this applies, empty for all.
@@ -177,6 +178,12 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
     AttrRecs.push_back(CR);
   }
 
+  Recs = R->getValueAsListOfDefs("properties");
+
+  // Get property records
+  for (const Record *CR : Recs)
+    PropRecs.push_back(CR);
+
   // Get the operation class
   OpClass = R->getValueAsDef("OpClass")->getName();
 
@@ -313,45 +320,18 @@ static std::string getStageMaskString(ArrayRef<const Record *> Recs) {
   return MaskString;
 }
 
-/// Return a string representation of valid attribute information denoted
-// by input records
-//
-/// \param Recs A vector of records of TableGen Attribute records
-/// \return std::string string representation of stages mask string
-///         predicated by DXIL Version. E.g.,
-//          {{{1, 0}, Mask1}, {{1, 2}, Mask2}, ...}
-static std::string getAttributeMaskString(ArrayRef<const Record *> Recs) {
-  std::string MaskString = "";
-  std::string Prefix = "";
-  MaskString.append("{");
-
-  for (const auto *Rec : Recs) {
-    unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
-    unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor");
-    MaskString.append(Prefix)
-        .append("{{")
-        .append(std::to_string(Major))
-        .append(", ")
-        .append(std::to_string(Minor).append("}, "));
-
-    std::string PipePrefix = "";
-    auto Attrs = Rec->getValueAsListOfDefs("op_attrs");
-    if (Attrs.empty()) {
-      MaskString.append("Attribute::None");
-    } else {
-      for (const auto *Attr : Attrs) {
-        MaskString.append(PipePrefix)
-            .append("Attribute::")
-            .append(Attr->getName());
-        PipePrefix = " | ";
-      }
-    }
-
-    MaskString.append("}");
-    Prefix = ", ";
+/// Emit a list valid DXIL Version records
+static void emitDXILVersions(const RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef DXIL_VERSION\n";
+  for (const Record *Version : Records.getAllDerivedDefinitions("Version")) {
+    unsigned Major = Version->getValueAsInt("Major");
+    unsigned Minor = Version->getValueAsInt("Minor");
+    OS << "DXIL_VERSION(";
+    OS << std::to_string(Major) << ", " << std::to_string(Minor);
+    OS << ")\n";
   }
-  MaskString.append("}");
-  return MaskString;
+  OS << "#undef DXIL_VERSION\n";
+  OS << "#endif\n\n";
 }
 
 /// Emit a mapping of DXIL opcode to opname
@@ -383,6 +363,73 @@ static void emitDXILOpParamTypes(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
+/// Emit a list of DXIL op function attributes
+static void emitDXILAttributes(const RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef DXIL_ATTRIBUTE\n";
+  for (const Record *Attr : Records.getAllDerivedDefinitions("DXILAttribute"))
+    OS << "DXIL_ATTRIBUTE(" << Attr->getName() << ")\n";
+  OS << "#undef DXIL_ATTRIBUTE\n";
+  OS << "#endif\n\n";
+}
+
+// Helper function to determine if the given Attr is defined in the vector
+// Attrs, by comparing the names
+static bool attrIsDefined(std::vector<const Record *> Attrs,
+                          const Record *Attr) {
+  for (auto CurAttr : Attrs)
+    if (CurAttr->getName() == Attr->getName())
+      return true;
+  return false;
+}
+
+/// Emit a table of bools denoting a DXIL op's function attributes
+static void emitDXILOpAttributes(const RecordKeeper &Records,
+                                 ArrayRef<DXILOperationDesc> Ops,
+                                 raw_ostream &OS) {
+  // A DXIL op can have multiple function attributes that are specific to a
+  // specific DXIL version and higher. AttrRecs models this by grouping the
+  // attributes by the versions. So we will output a macro for each version
+  // number with a table of bools in the following format:
+  //
+  //     OpName, VersionMajor, VersionMinor, FnAttr1, FnAttr2, ...
+  // Eg)    Abs,            1,            0,    true,   false, ...
+  OS << "#ifdef DXIL_OP_ATTRIBUTES\n";
+  for (const auto &Op : Ops) {
+    for (const auto *Rec : Op.AttrRecs) {
+      unsigned Major =
+          Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
+      unsigned Minor =
+          Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor");
+      OS << "DXIL_OP_ATTRIBUTES(dxil::OpCode::" << Op.OpName << ", ";
+      OS << std::to_string(Major) << ", " << std::to_string(Minor);
+      // These Attrs are the ones set for above DXIL version
+      auto Attrs = Rec->getValueAsListOfDefs("fn_attrs");
+      // We will then iteratre through all possible attributes and mark the
+      // present ones as 'true' and all the others as 'false' to create the
+      // boolean table, eg) true, false, false, false
+      for (const Record *Attr :
+           Records.getAllDerivedDefinitions("DXILAttribute")) {
+        std::string HasAttr = ", false";
+        if (attrIsDefined(Attrs, Attr))
+          HasAttr = ", true";
+        OS << HasAttr;
+      }
+      OS << ")\n";
+    }
+  }
+  OS << "#undef DXIL_OP_ATTRIBUTES\n";
+  OS << "#endif\n\n";
+}
+
+/// Emit a list of DXIL op properties
+static void emitDXILProperties(const RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef DXIL_PROPERTY\n";
+  for (const Record *Prop : Records.getAllDerivedDefinitions("DXILProperty"))
+    OS << "DXIL_PROPERTY(" << Prop->getName() << ")\n";
+  OS << "#undef DXIL_PROPERTY\n";
+  OS << "#endif\n\n";
+}
+
 /// Emit a list of DXIL op function types
 static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops,
                                     raw_ostream &OS) {
@@ -478,8 +525,7 @@ static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
        << OpStrings.get(Op.OpName) << ", OpCodeClass::" << Op.OpClass << ", "
        << OpClassStrings.get(Op.OpClass.data()) << ", "
        << getOverloadMaskString(Op.OverloadRecs) << ", "
-       << getStageMaskString(Op.StageRecs) << ", "
-       << getAttributeMaskString(Op.AttrRecs) << ", " << Op.OverloadParamIndex
+       << getStageMaskString(Op.StageRecs) << ", " << Op.OverloadParamIndex
        << " }";
     Prefix = ",\n";
   }
@@ -581,9 +627,13 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
     PrevOp = Desc.OpCode;
   }
 
+  emitDXILVersions(Records, OS);
   emitDXILOpCodes(DXILOps, OS);
   emitDXILOpClasses(Records, OS);
   emitDXILOpParamTypes(Records, OS);
+  emitDXILAttributes(Records, OS);
+  emitDXILOpAttributes(Records, DXILOps, OS);
+  emitDXILProperties(Records, OS);
   emitDXILOpFunctionTypes(DXILOps, OS);
   emitDXILIntrinsicArgSelectTypes(Records, OS);
   emitDXILIntrinsicMap(DXILOps, OS);
diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
new file mode 100644
index 0000000000000..cb971b089f5a4
--- /dev/null
+++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
@@ -0,0 +1,361 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Basic/SequenceToOffsetTable.h"
+#include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo.
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+static cl::OptionCategory SDNodeInfoEmitterCat("Options for -gen-sdnode-info");
+
+static cl::opt<std::string> TargetSDNodeNamespace(
+    "sdnode-namespace", cl::cat(SDNodeInfoEmitterCat),
+    cl::desc("Specify target SDNode namespace (default=<Target>ISD)"));
+
+static cl::opt<bool> WarnOnSkippedNodes(
+    "warn-on-skipped-nodes", cl::cat(SDNodeInfoEmitterCat),
+    cl::desc("Explain why a node was skipped (default=true)"), cl::init(true));
+
+namespace {
+
+class SDNodeInfoEmitter {
+  const RecordKeeper &RK;
+  const CodeGenTarget Target;
+  std::map<StringRef, SmallVector<SDNodeInfo, 2>> NodesByName;
+
+public:
+  explicit SDNodeInfoEmitter(const RecordKeeper &RK);
+
+  void run(raw_ostream &OS) const;
+
+private:
+  void emitEnum(raw_ostream &OS) const;
+
+  std::vector<unsigned> emitNodeNames(raw_ostream &OS) const;
+
+  std::vector<std::pair<unsigned, unsigned>>
+  emitTypeConstraints(raw_ostream &OS) const;
+
+  void emitDescs(raw_ostream &OS) const;
+};
+
+} // namespace
+
+static bool haveCompatibleDescriptions(const SDNodeInfo &N1,
+                                       const SDNodeInfo &N2) {
+  // Number of results/operands must match.
+  if (N1.getNumResults() != N2.getNumResults() ||
+      N1.getNumOperands() != N2.getNumOperands())
+    return false;
+
+  // Flags must match.
+  if (N1.isStrictFP() != N2.isStrictFP() || N1.getTSFlags() != N2.getTSFlags())
+    return false;
+
+  // We're only interested in a subset of node properties. Properties like
+  // SDNPAssociative and SDNPCommutative do not impose constraints on nodes,
+  // and sometimes differ between nodes sharing the same enum name.
+  constexpr unsigned PropMask = (1 << SDNPHasChain) | (1 << SDNPOutGlue) |
+                                (1 << SDNPInGlue) | (1 << SDNPOptInGlue) |
+                                (1 << SDNPMemOperand) | (1 << SDNPVariadic);
+
+  return (N1.getProperties() & PropMask) == (N2.getProperties() & PropMask);
+}
+
+static bool haveCompatibleDescriptions(ArrayRef<SDNodeInfo> Nodes) {
+  const SDNodeInfo &N = Nodes.front();
+  return all_of(drop_begin(Nodes), [&](const SDNodeInfo &Other) {
+    return haveCompatibleDescriptions(Other, N);
+  });
+}
+
+static void warnOnSkippedNode(const SDNodeInfo &N, const Twine &Reason) {
+  PrintWarning(N.getRecord()->getLoc(), "skipped node: " + Reason);
+}
+
+SDNodeInfoEmitter::SDNodeInfoEmitter(const RecordKeeper &RK)
+    : RK(RK), Target(RK) {
+  const CodeGenHwModes &HwModes = Target.getHwModes();
+
+  // Figure out target SDNode namespace.
+  if (!TargetSDNodeNamespace.getNumOccurrences())
+    TargetSDNodeNamespace = Target.getName().str() + "ISD";
+
+  // Filter nodes by the target SDNode namespace and create a mapping
+  // from an enum name to a list of nodes that have that name.
+  // The mapping is usually 1:1, but in rare cases it can be 1:N.
+  for (const Record *R : RK.getAllDerivedDefinitions("SDNode")) {
+    SDNodeInfo Node(R, HwModes);
+    auto [NS, EnumName] = Node.getEnumName().split("::");
+
+    if (NS.empty() || EnumName.empty()) {
+      if (WarnOnSkippedNodes)
+        warnOnSkippedNode(Node, "invalid enum name");
+      continue;
+    }
+
+    if (NS != TargetSDNodeNamespace)
+      continue;
+
+    NodesByName[EnumName].push_back(std::move(Node));
+  }
+
+  // Filter out nodes that have different "prototypes" and/or flags.
+  // Don't look at type constraints though, we will simply skip emitting
+  // the constraints if they differ.
+  decltype(NodesByName)::iterator Next;
+  for (auto I = NodesByName.begin(), E = NodesByName.end(); I != E; I = Next) {
+    Next = std::next(I);
+
+    if (haveCompatibleDescriptions(I->second))
+      continue;
+
+    if (WarnOnSkippedNodes)
+      for (const SDNodeInfo &N : I->second)
+        warnOnSkippedNode(N, "incompatible description");
+
+    NodesByName.erase(I);
+  }
+}
+
+void SDNodeInfoEmitter::emitEnum(raw_ostream &OS) const {
+  OS << "#ifdef GET_SDNODE_ENUM\n";
+  OS << "#undef GET_SDNODE_ENUM\n\n";
+  OS << "namespace llvm::" << TargetSDNodeNamespace << " {\n\n";
+
+  if (!NodesByName.empty()) {
+    StringRef FirstName = NodesByName.begin()->first;
+    StringRef LastName = NodesByName.rbegin()->first;
+
+    OS << "enum GenNodeType : unsigned {\n";
+    OS << "  " << FirstName << " = ISD::BUILTIN_OP_END,\n";
+
+    for (StringRef EnumName : make_first_range(drop_begin(NodesByName)))
+      OS << "  " << EnumName << ",\n";
+
+    OS << "};\n\n";
+    OS << "static constexpr unsigned GENERATED_OPCODE_END = " << LastName
+       << " + 1;\n\n";
+  } else {
+    OS << "static constexpr unsigned GENERATED_OPCODE_END = "
+          "ISD::BUILTIN_OP_END;\n\n";
+  }
+
+  OS << "} // namespace llvm::" << TargetSDNodeNamespace << "\n\n";
+  OS << "#endif // GET_SDNODE_ENUM\n\n";
+}
+
+std::vector<unsigned> SDNodeInfoEmitter::emitNodeNames(raw_ostream &OS) const {
+  StringToOffsetTable NameTable;
+
+  std::vector<unsigned> NameOffsets;
+  NameOffsets.reserve(NodesByName.size());
+
+  for (StringRef EnumName : make_first_range(NodesByName)) {
+    SmallString<64> DebugName;
+    raw_svector_ostream SS(DebugName);
+    SS << TargetSDNodeNamespace << "::" << EnumName;
+    NameOffsets.push_back(NameTable.GetOrAddStringOffset(DebugName));
+  }
+
+  NameTable.EmitStringLiteralDef(
+      OS, "static const char " + Target.getName() + "SDNodeNames[]",
+      /*Indent=*/"");
+  OS << '\n';
+
+  return NameOffsets;
+}
+
+static StringRef getTypeConstraintKindName(SDTypeConstraint::KindTy Kind) {
+#define CASE(NAME)                                                             \
+  case SDTypeConstraint::NAME:                                                 \
+    return #NAME
+
+  switch (Kind) {
+    CASE(SDTCisVT);
+    CASE(SDTCisPtrTy);
+    CASE(SDTCisInt);
+    CASE(SDTCisFP);
+    CASE(SDTCisVec);
+    CASE(SDTCisSameAs);
+    CASE(SDTCisVTSmallerThanOp);
+    CASE(SDTCisOpSmallerThanOp);
+    CASE(SDTCisEltOfVec);
+    CASE(SDTCisSubVecOfVec);
+    CASE(SDTCVecEltisVT);
+    CASE(SDTCisSameNumEltsAs);
+    CASE(SDTCisSameSizeAs);
+  }
+  llvm_unreachable("Unknown constraint kind"); // Make MSVC happy.
+#undef CASE
+}
+
+static void emitTypeConstraint(raw_ostream &OS, SDTypeConstraint C) {
+  unsigned OtherOpNo = 0;
+  MVT VT;
+
+  switch (C.ConstraintType) {
+  case SDTypeConstraint::SDTCisVT:
+  case SDTypeConstraint::SDTCVecEltisVT:
+    if (C.VVT.isSimple())
+      VT = C.VVT.getSimple();
+    break;
+  case SDTypeConstraint::SDTCisPtrTy:
+  case SDTypeConstraint::SDTCisInt:
+  case SDTypeConstraint::SDTCisFP:
+  case SDTypeConstraint::SDTCisVec:
+    break;
+  case SDTypeConstraint::SDTCisSameAs:
+  case SDTypeConstraint::SDTCisVTSmallerThanOp:
+  case SDTypeConstraint::SDTCisOpSmallerThanOp:
+  case SDTypeConstraint::SDTCisEltOfVec:
+  case SDTypeConstraint::SDTCisSubVecOfVec:
+  case SDTypeConstraint::SDTCisSameNumEltsAs:
+  case SDTypeConstraint::SDTCisSameSizeAs:
+    OtherOpNo = C.OtherOperandNo;
+    break;
+  }
+
+  StringRef KindName = getTypeConstraintKindName(C.ConstraintType);
+  StringRef VTName = VT.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE
+                         ? "MVT::INVALID_SIMPLE_VALUE_TYPE"
+                         : getEnumName(VT.SimpleTy);
+  OS << formatv("{{{}, {}, {}, {}}", KindName, C.OperandNo, OtherOpNo, VTName);
+}
+
+std::vector<std::pair<unsigned, unsigned>>
+SDNodeInfoEmitter::emitTypeConstraints(raw_ostream &OS) const {
+  using ConstraintsVecTy = SmallVector<SDTypeConstraint, 0>;
+  SequenceToOffsetTable<ConstraintsVecTy> ConstraintTable(
+      /*Terminator=*/std::nullopt);
+
+  std::vector<std::pair<unsigned, unsigned>> ConstraintOffsetsAndCounts;
+  ConstraintOffsetsAndCounts.reserve(NodesByName.size());
+
+  SmallVector<StringRef> SkippedNodes;
+  for (const auto &[EnumName, Nodes] : NodesByName) {
+    ArrayRef<SDTypeConstraint> Constraints = Nodes.front().getTypeConstraints();
+
+    bool IsAmbiguous = any_of(drop_begin(Nodes), [&](const SDNodeInfo &Other) {
+      return ArrayRef(Other.getTypeConstraints()) != Constraints;
+    });
+
+    // If nodes with the same enum name have different constraints,
+    // treat them as if they had no constraints at all.
+    if (IsAmbiguous) {
+      SkippedNodes.push_back(EnumName);
+      continue;
+    }
+
+    // Don't add empty sequences to the table. This slightly simplifies
+    // the implementation and makes the output less confusing if the table
+    // ends up empty.
+    if (Constraints.empty())
+      continue;
+
+    // SequenceToOffsetTable reuses the storage if a sequence matches another
+    // sequence's *suffix*. It is more likely that we have a matching *prefix*,
+    // so reverse the order to increase the likelihood of a match.
+    ConstraintTable.add(ConstraintsVecTy(reverse(Constraints)));
+  }
+
+  ConstraintTable.layout();
+
+  OS << "static const SDTypeConstraint " << Target.getName()
+     << "SDTypeConstraints[] = {\n";
+  ConstraintTable.emit(OS, emitTypeConstraint);
+  OS << "};\n\n";
+
+  for (const auto &[EnumName, Nodes] : NodesByName) {
+    ArrayRef<SDTypeConstraint> Constraints = Nodes.front().getTypeConstraints();
+
+    if (Constraints.empty() || is_contained(SkippedNodes, EnumName)) {
+      ConstraintOffsetsAndCounts.emplace_back(/*Offset=*/0, /*Size=*/0);
+      continue;
+    }
+
+    unsigned ConstraintsOffset =
+        ConstraintTable.get(ConstraintsVecTy(reverse(Constraints)));
+    ConstraintOffsetsAndCounts.emplace_back(ConstraintsOffset,
+                                            Constraints.size());
+  }
+
+  return ConstraintOffsetsAndCounts;
+}
+
+static void emitDesc(raw_ostream &OS, StringRef EnumName,
+                     ArrayRef<SDNodeInfo> Nodes, unsigned NameOffset,
+                     unsigned ConstraintsOffset, unsigned ConstraintCount) {
+  assert(haveCompatibleDescriptions(Nodes));
+  const SDNodeInfo &N = Nodes.front();
+  OS << "    {" << N.getNumResults() << ", " << N.getNumOperands() << ", 0";
+
+  // Emitted properties must be kept in sync with haveCompatibleDescriptions.
+  unsigned Properties = N.getProperties();
+  if (Properties & (1 << SDNPHasChain))
+    OS << "|1<<SDNPHasChain";
+  if (Properties & (1 << SDNPOutGlue))
+    OS << "|1<<SDNPOutGlue";
+  if (Properties & (1 << SDNPInGlue))
+    OS << "|1<<SDNPInGlue";
+  if (Properties & (1 << SDNPOptInGlue))
+    OS << "|1<<SDNPOptInGlue";
+  if (Properties & (1 << SDNPVariadic))
+    OS << "|1<<SDNPVariadic";
+  if (Properties & (1 << SDNPMemOperand))
+    OS << "|1<<SDNPMemOperand";
+
+  OS << ", 0";
+  if (N.isStrictFP())
+    OS << "|1<<SDNFIsStrictFP";
+
+  OS << formatv(", {}, {}, {}, {}}, // {}\n", N.getTSFlags(), NameOffset,
+                ConstraintsOffset, ConstraintCount, EnumName);
+}
+
+void SDNodeInfoEmitter::emitDescs(raw_ostream &OS) const {
+  StringRef TargetName = Target.getName();
+
+  OS << "#ifdef GET_SDNODE_DESC\n";
+  OS << "#undef GET_SDNODE_DESC\n\n";
+  OS << "namespace llvm {\n";
+
+  std::vector<unsigned> NameOffsets = emitNodeNames(OS);
+  std::vector<std::pair<unsigned, unsigned>> ConstraintOffsetsAndCounts =
+      emitTypeConstraints(OS);
+
+  OS << "static const SDNodeDesc " << TargetName << "SDNodeDescs[] = {\n";
+
+  for (const auto &[NameAndNodes, NameOffset, ConstraintOffsetAndCount] :
+       zip_equal(NodesByName, NameOffsets, ConstraintOffsetsAndCounts))
+    emitDesc(OS, NameAndNodes.first, NameAndNodes.second, NameOffset,
+             ConstraintOffsetAndCount.first, ConstraintOffsetAndCount.second);
+
+  OS << "};\n\n";
+
+  OS << formatv("static const SDNodeInfo {0}GenSDNodeInfo(\n"
+                "    /*NumOpcodes=*/{1}, {0}SDNodeDescs,\n"
+                "    {0}SDNodeNames, {0}SDTypeConstraints);\n\n",
+                TargetName, NodesByName.size());
+
+  OS << "} // namespace llvm\n\n";
+  OS << "#endif // GET_SDNODE_DESC\n\n";
+}
+
+void SDNodeInfoEmitter::run(raw_ostream &OS) const {
+  emitSourceFileHeader("Target SDNode descriptions", OS, RK);
+  emitEnum(OS);
+  emitDescs(OS);
+}
+
+static TableGen::Emitter::OptClass<SDNodeInfoEmitter>
+    X("gen-sd-node-info", "Generate target SDNode descriptions");
diff --git a/llvm/utils/docker/build_docker_image.sh b/llvm/utils/docker/build_docker_image.sh
index f2d43934f4cca..68de54709c295 100755
--- a/llvm/utils/docker/build_docker_image.sh
+++ b/llvm/utils/docker/build_docker_image.sh
@@ -23,7 +23,7 @@ Available options:
   General:
     -h|--help               show this help message
   Docker-specific:
-    -s|--source             image source dir (i.e. debian10, nvidia-cuda, etc)
+    -s|--source             image source dir (i.e. debian12, nvidia-cuda, etc)
     -d|--docker-repository  docker repository for the image
     -t|--docker-tag         docker tag for the image
   Checkout arguments:
@@ -38,7 +38,7 @@ Available options:
     -p|--llvm-project   Add the project to a list LLVM_ENABLE_PROJECTS, passed to
                         CMake.
                         Can be specified multiple times.
-    -c|--checksums      name of a file, containing checksums of llvm checkout.
+    --checksums         name of a file, containing checksums of llvm checkout.
                         Script will fail if checksums of the checkout do not
                         match.
   Build-specific:
@@ -51,18 +51,18 @@ Required options: --source and --docker-repository, at least one
 All options after '--' are passed to CMake invocation.
 
 For example, running:
-$ build_docker_image.sh -s debian10 -d mydocker/debian10-clang -t latest \
+$ build_docker_image.sh -s debian12 -d mydocker/debian12-clang -t latest \
   -p clang -i install-clang -i install-clang-resource-headers
 will produce two docker images:
-    mydocker/debian10-clang-build:latest - an intermediate image used to compile
+    mydocker/debian12-clang-build:latest - an intermediate image used to compile
       clang.
-    mydocker/clang-debian10:latest       - a small image with preinstalled clang.
+    mydocker/clang-debian12:latest       - a small image with preinstalled clang.
 Please note that this example produces a not very useful installation, since it
 doesn't override CMake defaults, which produces a Debug and non-boostrapped
 version of clang.
 
 To get a 2-stage clang build, you could use this command:
-$ ./build_docker_image.sh -s debian10 -d mydocker/clang-debian10 -t "latest" \
+$ ./build_docker_image.sh -s debian12 -d mydocker/clang-debian12 -t "latest" \
     -p clang -i stage2-install-clang -i stage2-install-clang-resource-headers \ 
     -- \ 
     -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ 
@@ -110,7 +110,7 @@ while [[ $# -gt 0 ]]; do
       CMAKE_ENABLED_PROJECTS="$CMAKE_ENABLED_PROJECTS;$PROJ"
       shift 2
       ;;
-    -c|--checksums)
+    --checksums)
       shift
       CHECKSUMS_FILE="$1"
       shift
@@ -160,7 +160,7 @@ if [ $SEEN_INSTALL_TARGET -eq 0 ]; then
   exit 1
 fi
 
-SOURCE_DIR=$(dirname $0)
+SOURCE_DIR=$(dirname "$0")
 if [ ! -d "$SOURCE_DIR/$IMAGE_SOURCE" ]; then
   echo "No sources for '$IMAGE_SOURCE' were found in $SOURCE_DIR"
   exit 1
diff --git a/llvm/utils/docker/debian10/Dockerfile b/llvm/utils/docker/debian12/Dockerfile
similarity index 79%
rename from llvm/utils/docker/debian10/Dockerfile
rename to llvm/utils/docker/debian12/Dockerfile
index b898c935b9726..84123c439aced 100644
--- a/llvm/utils/docker/debian10/Dockerfile
+++ b/llvm/utils/docker/debian12/Dockerfile
@@ -1,4 +1,4 @@
-#===- llvm/utils/docker/debian10/build/Dockerfile -------------------------===//
+#===- llvm/utils/docker/debian12/build/Dockerfile -------------------------===//
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
 #
 #===----------------------------------------------------------------------===//
 # Stage 1. Check out LLVM source code and run the build.
-FROM launcher.gcr.io/google/debian10:latest as builder
-LABEL maintainer "LLVM Developers"
+FROM launcher.gcr.io/google/debian12:latest AS builder
+LABEL maintainer="LLVM Developers"
 # Install build dependencies of llvm.
 # First, Update the apt's source list and include the sources of the packages.
 RUN grep deb /etc/apt/sources.list | \
@@ -19,8 +19,8 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 # Install a newer ninja release. It seems the older version in the debian repos
 # randomly crashes when compiling llvm.
-RUN wget "https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip" && \
-    echo "d2fea9ff33b3ef353161ed906f260d565ca55b8ca0568fa07b1d2cab90a84a07 ninja-linux.zip" \
+RUN wget "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip" && \
+    echo "6f98805688d19672bd699fbbfa2c2cf0fc054ac3df1f0e6a47664d963d530255 ninja-linux.zip" \
         | sha256sum -c  && \
     unzip ninja-linux.zip -d /usr/local/bin && \
     rm ninja-linux.zip
@@ -37,11 +37,11 @@ RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_arg
 
 
 # Stage 2. Produce a minimal release image with build results.
-FROM launcher.gcr.io/google/debian10:latest
-LABEL maintainer "LLVM Developers"
+FROM launcher.gcr.io/google/debian12:latest
+LABEL maintainer="LLVM Developers"
 # Install packages for minimal useful image.
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends libstdc++-7-dev binutils && \
+    apt-get install -y --no-install-recommends binutils && \
     rm -rf /var/lib/apt/lists/*
 # Copy build results of stage 1 to /usr/local.
 COPY --from=builder /tmp/clang-install/ /usr/local/
diff --git a/llvm/utils/docker/example/Dockerfile b/llvm/utils/docker/example/Dockerfile
index ebfb0e49c82c8..197716fa7bdff 100644
--- a/llvm/utils/docker/example/Dockerfile
+++ b/llvm/utils/docker/example/Dockerfile
@@ -10,9 +10,9 @@
 
 # Stage 1. Check out LLVM source code and run the build.
 # FIXME: Replace 'ubuntu' with your base image
-FROM ubuntu as builder
+FROM ubuntu AS builder
 # FIXME: Change maintainer name
-LABEL maintainer "Maintainer <maintainer@email>"
+LABEL maintainer="Maintainer <maintainer@email>"
 # FIXME: Install llvm/clang build dependencies here. Including compiler to
 # build stage1, cmake, subversion, ninja, etc.
 
@@ -31,7 +31,7 @@ RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_arg
 # FIXME: Replace 'ubuntu' with your base image.
 FROM ubuntu
 # FIXME: Change maintainer name.
-LABEL maintainer "Maintainer <maintainer@email>"
+LABEL maintainer="Maintainer <maintainer@email>"
 # FIXME: Install all packages you want to have in your release container.
 # A minimal useful installation should include at least libstdc++ and binutils.
 
diff --git a/llvm/utils/docker/nvidia-cuda/Dockerfile b/llvm/utils/docker/nvidia-cuda/Dockerfile
index 91ad53c57b165..035c5825df0fd 100644
--- a/llvm/utils/docker/nvidia-cuda/Dockerfile
+++ b/llvm/utils/docker/nvidia-cuda/Dockerfile
@@ -6,11 +6,11 @@
 #
 #===----------------------------------------------------------------------===//
 # Stage 1. Check out LLVM source code and run the build.
-FROM nvidia/cuda:8.0-devel as builder
-LABEL maintainer "LLVM Developers"
+FROM nvidia/cuda:12.6.3-devel-ubuntu24.04 AS builder
+LABEL maintainer="LLVM Developers"
 # Install llvm build dependencies.
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends ca-certificates cmake python \
+    apt-get install -y --no-install-recommends ca-certificates cmake 2to3 python-is-python3 \
         subversion ninja-build git && \
     rm -rf /var/lib/apt/lists/*
 
@@ -26,8 +26,8 @@ RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_arg
 
 
 # Stage 2. Produce a minimal release image with build results.
-FROM nvidia/cuda:8.0-devel
-LABEL maintainer "LLVM Developers"
+FROM nvidia/cuda:12.6.3-devel-ubuntu24.04
+LABEL maintainer="LLVM Developers"
 # Copy clang installation into this container.
 COPY --from=builder /tmp/clang-install/ /usr/local/
 # C++ standard library and binutils are already included in the base package.
diff --git a/llvm/utils/docker/scripts/checkout.sh b/llvm/utils/docker/scripts/checkout.sh
index 8fe7c6141d86b..677fa61e6ae51 100755
--- a/llvm/utils/docker/scripts/checkout.sh
+++ b/llvm/utils/docker/scripts/checkout.sh
@@ -18,7 +18,7 @@ Checkout git sources into /tmp/clang-build/src. Used inside a docker container.
 Available options:
   -h|--help           show this help message
   -b|--branch         git branch to checkout, i.e. 'main',
-                      'release/10.x'
+                      'release/19.x'
                       (default: 'main')
   -r|--revision       git revision to checkout
   -c|--cherrypick     revision to cherry-pick. Can be specified multiple times.
@@ -83,7 +83,7 @@ function apply_cherrypicks() {
   # This function is always called on a sorted list of cherrypicks.
   for CHERRY_REV in $CHERRYPICKS; do
     echo "Cherry-picking $CHERRY_REV into $CHECKOUT_DIR"
-    EMAIL="someone@somewhere.net" git cherry-pick $CHERRY_REV
+    EMAIL="someone@somewhere.net" git cherry-pick "$CHERRY_REV"
   done
 
   popd
@@ -97,7 +97,7 @@ mkdir -p "$CLANG_BUILD_DIR/src"
 CHECKOUT_DIR="$CLANG_BUILD_DIR/src"
 
 echo "Checking out https://github.com/llvm/llvm-project.git to $CHECKOUT_DIR"
-git clone -b $LLVM_BRANCH --single-branch \
+git clone -b "$LLVM_BRANCH" --single-branch \
   "https://github.com/llvm/llvm-project.git" \
   "$CHECKOUT_DIR"
 
@@ -114,7 +114,7 @@ CHECKSUMS_FILE="/tmp/checksums/checksums.txt"
 
 if [ -f "$CHECKSUMS_FILE" ]; then
   echo "Validating checksums for LLVM checkout..."
-  python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
+  python "$(dirname "$0")/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
     --partial --multi_dir "$CLANG_BUILD_DIR/src"
 else
   echo "Skipping checksumming checks..."
diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake
index 4933cafa41ed6..a917443f4cbaf 100644
--- a/mlir/cmake/modules/AddMLIR.cmake
+++ b/mlir/cmake/modules/AddMLIR.cmake
@@ -305,7 +305,9 @@ endfunction()
 # EXCLUDE_FROM_LIBMLIR
 #   Don't include this library in libMLIR.so.  This option should be used
 #   for test libraries, executable-specific libraries, or rarely used libraries
-#   with large dependencies.
+#   with large dependencies.  When using it, please link libraries included
+#   in libMLIR via mlir_target_link_libraries(), to ensure that the library
+#   does not pull in static dependencies when MLIR_LINK_MLIR_DYLIB=ON is used.
 # OBJECT
 #   The library's object library is referenced using "obj.${name}". For this to
 #   work reliably, this flag ensures that the OBJECT library exists.
diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake
index 404002e03b51b..3372e74ceb7aa 100644
--- a/mlir/cmake/modules/AddMLIRPython.cmake
+++ b/mlir/cmake/modules/AddMLIRPython.cmake
@@ -610,7 +610,6 @@ function(add_mlir_python_sources_target name)
 
       add_custom_command(
         OUTPUT "${_dest_path}"
-        PRE_BUILD
         COMMENT "Copying python source ${_src_path} -> ${_dest_path}"
         DEPENDS "${_src_path}"
         COMMAND "${CMAKE_COMMAND}" -E ${_link_or_copy}
@@ -707,7 +706,7 @@ function(add_mlir_python_extension libname extname)
           ${eh_rtti_enable}
       )
     endif()
-    
+
     if(APPLE)
       # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind
       # doesn't declare this API as undefined in its linker flags. So we need to declare it as such
diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
index cf0c96f0eba00..312f1fc5b20c9 100644
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -62,10 +62,10 @@ struct FunctionCallBuilder {
 
 /// Collect a set of patterns to convert from the GPU dialect to LLVM and
 /// populate converter for gpu types.
-void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                         RewritePatternSet &patterns,
-                                         bool kernelBarePtrCallConv = false,
-                                         bool typeCheckKernelArgs = false);
+void populateGpuToLLVMConversionPatterns(
+    LLVMTypeConverter &converter, RewritePatternSet &patterns,
+    bool kernelBarePtrCallConv = false,
+    bool kernelIntersperseSizeCallConv = false);
 
 /// A function that maps a MemorySpace enum to a target-specific integer value.
 using MemorySpaceMapping = std::function<unsigned(gpu::AddressSpace)>;
diff --git a/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h b/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h
deleted file mode 100644
index f69720328f2a4..0000000000000
--- a/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ConvertGPUToVulkanPass.h - GPU to Vulkan conversion pass -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The file declares a pass to convert GPU dialect ops to to Vulkan runtime
-// calls.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
-#define MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
-
-#include "mlir/Support/LLVM.h"
-
-#include <memory>
-
-namespace mlir {
-
-class ModuleOp;
-template <typename T>
-class OperationPass;
-class Pass;
-
-#define GEN_PASS_DECL_CONVERTVULKANLAUNCHFUNCTOVULKANCALLSPASS
-#define GEN_PASS_DECL_CONVERTGPULAUNCHFUNCTOVULKANLAUNCHFUNC
-#include "mlir/Conversion/Passes.h.inc"
-
-std::unique_ptr<OperationPass<mlir::ModuleOp>>
-createConvertGpuLaunchFuncToVulkanLaunchFuncPass();
-
-} // namespace mlir
-#endif // MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index ea05a5937fa6a..4b75c3f21ecd7 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -39,7 +39,6 @@
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
-#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
 #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
 #include "mlir/Conversion/IndexToSPIRV/IndexToSPIRV.h"
 #include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 86c04fa1dab17..177698f86d14a 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -518,11 +518,13 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> {
              "Use bare pointers to pass memref arguments to kernels. "
              "The kernel must use the same setting for this option."
           >,
-    Option<"typeCheckKernelArgs", "type-check-kernel-args", "bool",
+    Option<"kernelIntersperseSizeCallConv", "intersperse-sizes-for-kernels", "bool",
            /*default=*/"false",
-             "Require all kernel arguments to be memrefs of rank 1 and with a "
-             "32-bit element size. This is a temporary option that will be "
-             "removed; TODO(https://github.com/llvm/llvm-project/issues/73457)."
+           "Inserts a size_t argument following each memref argument, "
+           "containing the static size in bytes of the buffer. Incompatible "
+           "arguments are rejected. This is intended for use by the Vulkan "
+           "runtime with the kernel bare pointer calling convention, to enable "
+           "dynamic binding of buffers as arguments without static type info."
           >
   ];
 
@@ -652,31 +654,6 @@ def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> {
   ];
 }
 
-//===----------------------------------------------------------------------===//
-// GPUToVulkan
-//===----------------------------------------------------------------------===//
-
-def ConvertGpuLaunchFuncToVulkanLaunchFunc
-    : Pass<"convert-gpu-launch-to-vulkan-launch", "ModuleOp"> {
-  let summary = "Convert gpu.launch_func to vulkanLaunch external call";
-  let description = [{
-    This pass is only intended for the mlir-vulkan-runner.
-  }];
-  let constructor = "mlir::createConvertGpuLaunchFuncToVulkanLaunchFuncPass()";
-  let dependentDialects = ["spirv::SPIRVDialect"];
-}
-
-def ConvertVulkanLaunchFuncToVulkanCallsPass
-    : Pass<"launch-func-to-vulkan", "ModuleOp"> {
-  let summary = "Convert vulkanLaunch external call to Vulkan runtime external "
-                "calls";
-  let description = [{
-    This pass is only intended for the mlir-vulkan-runner.
-  }];
-
-  let dependentDialects = ["LLVM::LLVMDialect"];
-}
-
 //===----------------------------------------------------------------------===//
 // ConvertIndexToLLVMPass
 //===----------------------------------------------------------------------===//
@@ -1487,7 +1464,11 @@ def ConvertVectorToLLVMPass : Pass<"convert-vector-to-llvm"> {
     Option<"x86Vector", "enable-x86vector",
            "bool", /*default=*/"false",
            "Enables the use of X86Vector dialect while lowering the vector "
-	   "dialect.">
+	   "dialect.">,
+    Option<"vectorTransformsOptions", "vector-transform-options",
+           "vector::VectorTransformsOptions",
+           /*default=*/"vector::VectorTransformsOptions()",
+           "Options to lower some operations like contractions and transposes.">,
   ];
 }
 
diff --git a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h
index 4661d31b6364d..410b881db7959 100644
--- a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h
+++ b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h
@@ -9,6 +9,7 @@
 #define MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLLVMPASS_H_
 
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
+#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
 
 namespace mlir {
 class Pass;
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 71dac3ad39b7b..e9e62a74237c4 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -379,7 +379,6 @@ def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8">;
 def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8">;
 def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32">;
 def ROCDL_mfma_f32_32x32x4_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4.xf32">;
-// fp8, only on gfx940
 def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8">;
 def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8">;
 def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8">;
@@ -388,6 +387,13 @@ def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.b
 def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8">;
 def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8">;
 def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8">;
+// New in gfx950.
+def ROCDL_mfma_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf16">;
+def ROCDL_mfma_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x64.i8">;
+def ROCDL_mfma_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.f16">;
+def ROCDL_mfma_f32_32x32x16_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf16">;
+def ROCDL_mfma_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x32.i8">;
+def ROCDL_mfma_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.f16">;
 
 //===---------------------------------------------------------------------===//
 // WMMA intrinsics
@@ -412,6 +418,36 @@ def ROCDL_wmma_i32_16x16x16_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu4", [1]
 def ROCDL_wmma_f32_16x16x16_fp8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.fp8_fp8", [1]>;
 def ROCDL_wmma_f32_16x16x16_bf8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.bf8_bf8", [1]>;
 
+//===---------------------------------------------------------------------===//
+// LDS transpose intrinsics (available in GFX950)
+
+def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>;
+def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>;
+
+class ROCDL_LDS_Read_Tr_IntrOp<string mnemonic> :
+  ROCDL_IntrOp<mnemonic, [1], [], [], 1>,
+  Arguments<(ins Arg<ROCDLBufferLDS, "", [MemRead]>:$ptr)>{
+  let assemblyFormat = "$ptr attr-dict `:` type($ptr) `->` type($res)";
+}
+
+def ROCDL_ds_read_tr4_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr4.b64">;
+def ROCDL_ds_read_tr8_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr8.b64">;
+def ROCDL_ds_read_tr6_b96 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr6.b96">;
+def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
+
+//===---------------------------------------------------------------------===//
+// Global load to LDS intrinsic (available in GFX950)
+
+def ROCDL_GlobalLoadLDSOp :
+  ROCDL_IntrOp<"global.load.lds", [], [], [], 0>,
+  Arguments<(ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
+                 Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
+                 I32:$size,
+                 I32:$offset,
+                 I32:$aux)> {
+  let assemblyFormat = "operands attr-dict";
+}
+
 //===---------------------------------------------------------------------===//
 // Operations on raw buffer resources (stride of 0, bounds checks either off or in
 // raw buffer mode).
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 995524a4472bc..68eed7feb184d 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -699,11 +699,11 @@ FailureOr<GenericOp> interchangeGenericOp(RewriterBase &rewriter,
                                           GenericOp genericOp,
                                           ArrayRef<unsigned> interchangeVector);
 
-/// Create a GenericOp from the given named operation `namedOp` and replace
-/// namedOp.
-/// Return failure if `namedOp` is a GenericOp or misses a region builder.
+/// Create a GenericOp from the given named operation `linalgOp` and replace
+/// the given `linalgOp`.
+/// Return failure if `linalgOp` is a GenericOp or misses a region builder.
 FailureOr<GenericOp> generalizeNamedOp(RewriterBase &rewriter,
-                                       LinalgOp namedOp);
+                                       LinalgOp linalgOp);
 
 /// Create a namedOp from the given GenericOp and replace the GenericOp.
 /// Currently we can specialize only trivial linalg copy operations.
diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td
index 5ec995b3ae977..e592910303568 100644
--- a/mlir/include/mlir/IR/CommonTypeConstraints.td
+++ b/mlir/include/mlir/IR/CommonTypeConstraints.td
@@ -329,31 +329,31 @@ def F64 : F<64>;
 def F80 : F<80>;
 def F128 : F<128>;
 
-def BF16 : Type<CPred<"::llvm::isa<BFloat16Type>($_self)">, "bfloat16 type">,
+def BF16 : Type<CPred<"::llvm::isa<::mlir::BFloat16Type>($_self)">, "bfloat16 type">,
            BuildableType<"$_builder.getType<BFloat16Type>()">;
-def TF32 : Type<CPred<"::llvm::isa<FloatTF32Type>($_self)">, "tf32 type">,
+def TF32 : Type<CPred<"::llvm::isa<::mlir::FloatTF32Type>($_self)">, "tf32 type">,
            BuildableType<"$_builder.getType<FloatTF32Type>()">;
-def F8E4M3FN : Type<CPred<"::llvm::isa<Float8E4M3FNType>($_self)">, "f8E4M3FN type">,
+def F8E4M3FN : Type<CPred<"::llvm::isa<::mlir::Float8E4M3FNType>($_self)">, "f8E4M3FN type">,
                BuildableType<"$_builder.getType<Float8E4M3FNType>()">;
-def F8E5M2 : Type<CPred<"::llvm::isa<Float8E5M2Type>($_self)">, "f8E5M2 type">,
+def F8E5M2 : Type<CPred<"::llvm::isa<::mlir::Float8E5M2Type>($_self)">, "f8E5M2 type">,
              BuildableType<"$_builder.getType<Float8E5M2Type>()">;
-def F8E4M3 : Type<CPred<"::llvm::isa<Float8E4M3Type>($_self)">, "f8E4M3 type">,
+def F8E4M3 : Type<CPred<"::llvm::isa<::mlir::Float8E4M3Type>($_self)">, "f8E4M3 type">,
              BuildableType<"$_builder.getType<Float8E4M3Type>()">;
-def F8E4M3FNUZ : Type<CPred<"::llvm::isa<Float8E4M3FNUZType>($_self)">, "f8E4M3FNUZ type">,
+def F8E4M3FNUZ : Type<CPred<"::llvm::isa<::mlir::Float8E4M3FNUZType>($_self)">, "f8E4M3FNUZ type">,
                  BuildableType<"$_builder.getType<Float8E4M3FNUZType>()">;
-def F8E4M3B11FNUZ : Type<CPred<"::llvm::isa<Float8E4M3B11FNUZType>($_self)">, "f8E4M3B11FNUZ type">,
+def F8E4M3B11FNUZ : Type<CPred<"::llvm::isa<::mlir::Float8E4M3B11FNUZType>($_self)">, "f8E4M3B11FNUZ type">,
                  BuildableType<"$_builder.getType<Float8E4M3B11FNUZType>()">;
-def F8E5M2FNUZ : Type<CPred<"::llvm::isa<Float8E5M2FNUZType>($_self)">, "f8E5M2FNUZ type">,
+def F8E5M2FNUZ : Type<CPred<"::llvm::isa<::mlir::Float8E5M2FNUZType>($_self)">, "f8E5M2FNUZ type">,
                  BuildableType<"$_builder.getType<Float8E5M2FNUZType>()">;
-def F8E3M4 : Type<CPred<"::llvm::isa<Float8E3M4Type>($_self)">, "f8E3M4 type">,
+def F8E3M4 : Type<CPred<"::llvm::isa<::mlir::Float8E3M4Type>($_self)">, "f8E3M4 type">,
              BuildableType<"$_builder.getType<Float8E3M4Type>()">;
-def F4E2M1FN : Type<CPred<"::llvm::isa<Float4E2M1FNType>($_self)">, "f4E2M1FN type">,
+def F4E2M1FN : Type<CPred<"::llvm::isa<::mlir::Float4E2M1FNType>($_self)">, "f4E2M1FN type">,
                BuildableType<"$_builder.getType<Float4E2M1FNType>()">;
-def F6E2M3FN : Type<CPred<"::llvm::isa<Float6E2M3FNType>($_self)">, "f6E2M3FN type">,
+def F6E2M3FN : Type<CPred<"::llvm::isa<::mlir::Float6E2M3FNType>($_self)">, "f6E2M3FN type">,
                BuildableType<"$_builder.getType<Float6E2M3FNType>()">;
-def F6E3M2FN : Type<CPred<"::llvm::isa<Float6E3M2FNType($_self)">, "f6E3M2FN type">,
+def F6E3M2FN : Type<CPred<"::llvm::isa<::mlir::Float6E3M2FNType>($_self)">, "f6E3M2FN type">,
                BuildableType<"$_builder.getType<Float6E3M2FNType>()">;
-def F8E8M0FNU : Type<CPred<"::llvm::isa<Float8E8M0FNUType>($_self)">, "f8E8M0FNU type">,
+def F8E8M0FNU : Type<CPred<"::llvm::isa<::mlir::Float8E8M0FNUType>($_self)">, "f8E8M0FNU type">,
                 BuildableType<"$_builder.getType<Float8E8M0FNUType>()">;
 
 def AnyComplex : Type<CPred<"::llvm::isa<::mlir::ComplexType>($_self)">,
diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp
index 03566fc11b5f9..d3ceb3d435c1c 100644
--- a/mlir/lib/Bindings/Python/IRAttributes.cpp
+++ b/mlir/lib/Bindings/Python/IRAttributes.cpp
@@ -845,7 +845,7 @@ class PyDenseElementsAttribute
       }
       shapedType = *explicitType;
     } else {
-      SmallVector<int64_t> shape{static_cast<int64_t>(numAttributes)};
+      SmallVector<int64_t> shape = {static_cast<int64_t>(numAttributes)};
       shapedType = mlirRankedTensorTypeGet(
           shape.size(), shape.data(),
           mlirAttributeGetType(pyTryCast<PyAttribute>(attributes[0])),
diff --git a/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt b/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt
index 0be8f2af5dcf4..bf7dff897ab6d 100644
--- a/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt
@@ -1,8 +1,10 @@
 set(LLVM_LINK_COMPONENTS
   nativecodegen
   native
+  orcjit
+  support
 )
-  
+
 # Main API shared library.
 add_mlir_upstream_c_api_library(MLIRCAPIExecutionEngine
   ExecutionEngine.cpp
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 07ccf8e2dbba0..1ad0470fa8c97 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -28,7 +28,6 @@ add_subdirectory(GPUToLLVMSPV)
 add_subdirectory(GPUToNVVM)
 add_subdirectory(GPUToROCDL)
 add_subdirectory(GPUToSPIRV)
-add_subdirectory(GPUToVulkan)
 add_subdirectory(IndexToLLVM)
 add_subdirectory(IndexToSPIRV)
 add_subdirectory(LinalgToStandard)
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index ca9883a79dc16..8017eb6bb383b 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -428,10 +428,10 @@ class LegalizeLaunchFuncOpPattern
 public:
   LegalizeLaunchFuncOpPattern(const LLVMTypeConverter &typeConverter,
                               bool kernelBarePtrCallConv,
-                              bool typeCheckKernelArgs)
+                              bool kernelIntersperseSizeCallConv)
       : ConvertOpToGpuRuntimeCallPattern<gpu::LaunchFuncOp>(typeConverter),
         kernelBarePtrCallConv(kernelBarePtrCallConv),
-        typeCheckKernelArgs(typeCheckKernelArgs) {}
+        kernelIntersperseSizeCallConv(kernelIntersperseSizeCallConv) {}
 
 private:
   LogicalResult
@@ -439,7 +439,7 @@ class LegalizeLaunchFuncOpPattern
                   ConversionPatternRewriter &rewriter) const override;
 
   bool kernelBarePtrCallConv;
-  bool typeCheckKernelArgs;
+  bool kernelIntersperseSizeCallConv;
 };
 
 /// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime
@@ -566,8 +566,9 @@ void GpuToLLVMConversionPass::runOnOperation() {
   populateFinalizeMemRefToLLVMConversionPatterns(converter, patterns);
   populateAsyncStructuralTypeConversionsAndLegality(converter, patterns,
                                                     target);
-  populateGpuToLLVMConversionPatterns(
-      converter, patterns, kernelBarePtrCallConv, typeCheckKernelArgs);
+  populateGpuToLLVMConversionPatterns(converter, patterns,
+                                      kernelBarePtrCallConv,
+                                      kernelIntersperseSizeCallConv);
 
   if (failed(
           applyPartialConversion(getOperation(), target, std::move(patterns))))
@@ -970,33 +971,55 @@ LogicalResult LegalizeLaunchFuncOpPattern::matchAndRewrite(
   else if (launchOp.getAsyncToken())
     stream = streamCreateCallBuilder.create(loc, rewriter, {}).getResult();
 
-  if (typeCheckKernelArgs) {
-    // The current non-bare-pointer ABI is a bad fit for `mgpuLaunchKernel`,
-    // which takes an untyped list of arguments. The type check here prevents
-    // accidentally violating the assumption made in vulkan-runtime-wrappers.cpp
-    // and creating a unchecked runtime ABI mismatch.
-    // TODO(https://github.com/llvm/llvm-project/issues/73457): Change the ABI
-    // here to remove the need for this type check.
-    for (Value arg : launchOp.getKernelOperands()) {
-      if (auto memrefTy = dyn_cast<MemRefType>(arg.getType())) {
-        if (memrefTy.getRank() != 1 ||
-            memrefTy.getElementTypeBitWidth() != 32) {
-          return rewriter.notifyMatchFailure(
-              launchOp, "Operand to launch op is not a rank-1 memref with "
-                        "32-bit element type.");
-        }
-      } else {
+  // Lower the kernel operands to match kernel parameters.
+  // Note: If `useBarePtrCallConv` is set in the type converter's options,
+  // the value of `kernelBarePtrCallConv` will be ignored.
+  OperandRange origArguments = launchOp.getKernelOperands();
+  SmallVector<Value, 8> llvmArguments = getTypeConverter()->promoteOperands(
+      loc, origArguments, adaptor.getKernelOperands(), rewriter,
+      /*useBarePtrCallConv=*/kernelBarePtrCallConv);
+  SmallVector<Value, 8> llvmArgumentsWithSizes;
+
+  // Intersperse size information if requested.
+  if (kernelIntersperseSizeCallConv) {
+    if (origArguments.size() != llvmArguments.size()) {
+      // This shouldn't happen if the bare-pointer calling convention is used.
+      return rewriter.notifyMatchFailure(
+          launchOp,
+          "Cannot add sizes to arguments with one-to-many LLVM IR expansion.");
+    }
+
+    llvmArgumentsWithSizes.reserve(llvmArguments.size() * 2);
+    for (auto [llvmArg, origArg] : zip_equal(llvmArguments, origArguments)) {
+      auto memrefTy = dyn_cast<MemRefType>(origArg.getType());
+      if (!memrefTy) {
         return rewriter.notifyMatchFailure(
             launchOp, "Operand to launch op is not a memref.");
       }
+
+      if (!memrefTy.hasStaticShape() ||
+          !memrefTy.getElementType().isIntOrFloat()) {
+        return rewriter.notifyMatchFailure(
+            launchOp, "Operand to launch op is not a memref with a static "
+                      "shape and an integer or float element type.");
+      }
+
+      unsigned bitwidth = memrefTy.getElementTypeBitWidth();
+      if (bitwidth % 8 != 0) {
+        return rewriter.notifyMatchFailure(
+            launchOp, "Operand to launch op is not a memref with a "
+                      "byte-aligned element type.");
+      }
+
+      uint64_t staticSize = static_cast<uint64_t>(bitwidth / 8) *
+                            static_cast<uint64_t>(memrefTy.getNumElements());
+
+      Value sizeArg = rewriter.create<LLVM::ConstantOp>(
+          loc, getIndexType(), rewriter.getIndexAttr(staticSize));
+      llvmArgumentsWithSizes.push_back(llvmArg); // Presumably a bare pointer.
+      llvmArgumentsWithSizes.push_back(sizeArg);
     }
   }
-  // Lower the kernel operands to match kernel parameters.
-  // Note: If `useBarePtrCallConv` is set in the type converter's options,
-  // the value of `kernelBarePtrCallConv` will be ignored.
-  SmallVector<Value, 4> arguments = getTypeConverter()->promoteOperands(
-      loc, launchOp.getKernelOperands(), adaptor.getKernelOperands(), rewriter,
-      /*useBarePtrCallConv=*/kernelBarePtrCallConv);
 
   std::optional<gpu::KernelDim3> clusterSize = std::nullopt;
   if (launchOp.hasClusterSize()) {
@@ -1010,7 +1033,9 @@ LogicalResult LegalizeLaunchFuncOpPattern::matchAndRewrite(
                       adaptor.getGridSizeZ()},
       gpu::KernelDim3{adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
                       adaptor.getBlockSizeZ()},
-      adaptor.getDynamicSharedMemorySize(), arguments, stream, clusterSize);
+      adaptor.getDynamicSharedMemorySize(),
+      llvmArgumentsWithSizes.empty() ? llvmArguments : llvmArgumentsWithSizes,
+      stream, clusterSize);
   if (launchOp.getAsyncToken())
     rewriter.replaceOp(launchOp, {stream});
   else
@@ -1760,10 +1785,9 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite(
   return success();
 }
 
-void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                               RewritePatternSet &patterns,
-                                               bool kernelBarePtrCallConv,
-                                               bool typeCheckKernelArgs) {
+void mlir::populateGpuToLLVMConversionPatterns(
+    LLVMTypeConverter &converter, RewritePatternSet &patterns,
+    bool kernelBarePtrCallConv, bool kernelIntersperseSizeCallConv) {
   addOpaquePointerConversion<gpu::AsyncTokenType>(converter);
   addOpaquePointerConversion<gpu::SparseDnTensorHandleType>(converter);
   addOpaquePointerConversion<gpu::SparseSpMatHandleType>(converter);
@@ -1801,7 +1825,7 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
                ConvertSpMatGetSizeOpToGpuRuntimeCallPattern,
                ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter);
   patterns.add<LegalizeLaunchFuncOpPattern>(converter, kernelBarePtrCallConv,
-                                            typeCheckKernelArgs);
+                                            kernelIntersperseSizeCallConv);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt
deleted file mode 100644
index faeb32f2bc8cd..0000000000000
--- a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_mlir_conversion_library(MLIRGPUToVulkanTransforms
-  ConvertLaunchFuncToVulkanCalls.cpp
-  ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
-
-  DEPENDS
-  MLIRConversionPassIncGen
-
-  LINK_LIBS PUBLIC
-  MLIRFuncDialect
-  MLIRGPUDialect
-  MLIRIR
-  MLIRLLVMDialect
-  MLIRPass
-  MLIRSPIRVDialect
-  MLIRSPIRVSerialization
-  MLIRSupport
-  MLIRTransforms
-  MLIRTranslateLib
-  )
diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
deleted file mode 100644
index 8488fac69e8e3..0000000000000
--- a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//===- ConvertGPULaunchFuncToVulkanLaunchFunc.cpp - MLIR conversion pass --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to convert gpu launch function into a vulkan
-// launch function. Extracts the SPIR-V from a `gpu::BinaryOp` and attaches it
-// along with the entry point name as attributes to a Vulkan launch call op.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
-
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/SPIRV/Serialization.h"
-
-namespace mlir {
-#define GEN_PASS_DEF_CONVERTGPULAUNCHFUNCTOVULKANLAUNCHFUNC
-#include "mlir/Conversion/Passes.h.inc"
-} // namespace mlir
-
-using namespace mlir;
-
-static constexpr const char *kSPIRVBlobAttrName = "spirv_blob";
-static constexpr const char *kSPIRVEntryPointAttrName = "spirv_entry_point";
-static constexpr const char *kSPIRVElementTypesAttrName = "spirv_element_types";
-static constexpr const char *kVulkanLaunch = "vulkanLaunch";
-
-namespace {
-
-/// A pass to convert gpu launch op to vulkan launch call op, by extracting a
-/// SPIR-V binary shader from a `gpu::BinaryOp` and attaching binary data and
-/// entry point name as an attributes to created vulkan launch call op.
-class ConvertGpuLaunchFuncToVulkanLaunchFunc
-    : public impl::ConvertGpuLaunchFuncToVulkanLaunchFuncBase<
-          ConvertGpuLaunchFuncToVulkanLaunchFunc> {
-public:
-  void runOnOperation() override;
-
-private:
-  /// Extracts a SPIR-V binary shader from the given `module`, if any.
-  /// Note that this also removes the binary from the IR.
-  FailureOr<StringAttr> getBinaryShader(ModuleOp module);
-
-  /// Converts the given `launchOp` to vulkan launch call.
-  void convertGpuLaunchFunc(gpu::LaunchFuncOp launchOp);
-
-  /// Checks where the given type is supported by Vulkan runtime.
-  bool isSupportedType(Type type) {
-    if (auto memRefType = dyn_cast_or_null<MemRefType>(type)) {
-      auto elementType = memRefType.getElementType();
-      return memRefType.hasRank() &&
-             (memRefType.getRank() >= 1 && memRefType.getRank() <= 3) &&
-             (elementType.isIntOrFloat());
-    }
-    return false;
-  }
-
-  /// Declares the vulkan launch function. Returns an error if the any type of
-  /// operand is unsupported by Vulkan runtime.
-  LogicalResult declareVulkanLaunchFunc(Location loc,
-                                        gpu::LaunchFuncOp launchOp);
-
-private:
-  /// The number of vulkan launch configuration operands, placed at the leading
-  /// positions of the operand list.
-  static constexpr unsigned kVulkanLaunchNumConfigOperands = 3;
-};
-
-} // namespace
-
-void ConvertGpuLaunchFuncToVulkanLaunchFunc::runOnOperation() {
-  bool done = false;
-  getOperation().walk([this, &done](gpu::LaunchFuncOp op) {
-    if (done) {
-      op.emitError("should only contain one 'gpu::LaunchFuncOp' op");
-      return signalPassFailure();
-    }
-    done = true;
-    convertGpuLaunchFunc(op);
-  });
-
-  // Erase `gpu::GPUModuleOp` and `spirv::Module` operations.
-  for (auto gpuModule :
-       llvm::make_early_inc_range(getOperation().getOps<gpu::GPUModuleOp>()))
-    gpuModule.erase();
-
-  for (auto spirvModule :
-       llvm::make_early_inc_range(getOperation().getOps<spirv::ModuleOp>()))
-    spirvModule.erase();
-}
-
-LogicalResult ConvertGpuLaunchFuncToVulkanLaunchFunc::declareVulkanLaunchFunc(
-    Location loc, gpu::LaunchFuncOp launchOp) {
-  auto builder = OpBuilder::atBlockEnd(getOperation().getBody());
-
-  // Workgroup size is written into the kernel. So to properly modelling
-  // vulkan launch, we have to skip local workgroup size configuration here.
-  SmallVector<Type, 8> gpuLaunchTypes(launchOp.getOperandTypes());
-  // The first kVulkanLaunchNumConfigOperands of the gpu.launch_func op are the
-  // same as the config operands for the vulkan launch call op.
-  SmallVector<Type, 8> vulkanLaunchTypes(gpuLaunchTypes.begin(),
-                                         gpuLaunchTypes.begin() +
-                                             kVulkanLaunchNumConfigOperands);
-  vulkanLaunchTypes.append(gpuLaunchTypes.begin() +
-                               gpu::LaunchOp::kNumConfigOperands,
-                           gpuLaunchTypes.end());
-
-  // Check that all operands have supported types except those for the
-  // launch configuration.
-  for (auto type :
-       llvm::drop_begin(vulkanLaunchTypes, kVulkanLaunchNumConfigOperands)) {
-    if (!isSupportedType(type))
-      return launchOp.emitError() << type << " is unsupported to run on Vulkan";
-  }
-
-  // Declare vulkan launch function.
-  auto funcType = builder.getFunctionType(vulkanLaunchTypes, {});
-  builder.create<func::FuncOp>(loc, kVulkanLaunch, funcType).setPrivate();
-
-  return success();
-}
-
-FailureOr<StringAttr>
-ConvertGpuLaunchFuncToVulkanLaunchFunc::getBinaryShader(ModuleOp module) {
-  bool done = false;
-  StringAttr binaryAttr;
-  gpu::BinaryOp binaryToErase;
-  for (auto gpuBinary : module.getOps<gpu::BinaryOp>()) {
-    if (done)
-      return gpuBinary.emitError("should only contain one 'gpu.binary' op");
-    done = true;
-
-    ArrayRef<Attribute> objects = gpuBinary.getObjectsAttr().getValue();
-    if (objects.size() != 1)
-      return gpuBinary.emitError("should only contain a single object");
-
-    auto object = cast<gpu::ObjectAttr>(objects[0]);
-
-    if (!isa<spirv::TargetEnvAttr>(object.getTarget()))
-      return gpuBinary.emitError(
-          "should contain an object with a SPIR-V target environment");
-
-    binaryAttr = object.getObject();
-    binaryToErase = gpuBinary;
-  }
-  if (!done)
-    return module.emitError("should contain a 'gpu.binary' op");
-
-  // Remove the binary to avoid confusing later conversion passes.
-  binaryToErase.erase();
-  return binaryAttr;
-}
-
-void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc(
-    gpu::LaunchFuncOp launchOp) {
-  ModuleOp module = getOperation();
-  OpBuilder builder(launchOp);
-  Location loc = launchOp.getLoc();
-
-  FailureOr<StringAttr> binaryAttr = getBinaryShader(module);
-  // Extract SPIR-V from `gpu.binary` op.
-  if (failed(binaryAttr))
-    return signalPassFailure();
-
-  // Declare vulkan launch function.
-  if (failed(declareVulkanLaunchFunc(loc, launchOp)))
-    return signalPassFailure();
-
-  SmallVector<Value, 8> gpuLaunchOperands(launchOp.getOperands());
-  SmallVector<Value, 8> vulkanLaunchOperands(
-      gpuLaunchOperands.begin(),
-      gpuLaunchOperands.begin() + kVulkanLaunchNumConfigOperands);
-  vulkanLaunchOperands.append(gpuLaunchOperands.begin() +
-                                  gpu::LaunchOp::kNumConfigOperands,
-                              gpuLaunchOperands.end());
-
-  // Create vulkan launch call op.
-  auto vulkanLaunchCallOp = builder.create<func::CallOp>(
-      loc, TypeRange{}, SymbolRefAttr::get(builder.getContext(), kVulkanLaunch),
-      vulkanLaunchOperands);
-
-  // Set SPIR-V binary shader data as an attribute.
-  vulkanLaunchCallOp->setAttr(kSPIRVBlobAttrName, *binaryAttr);
-
-  // Set entry point name as an attribute.
-  vulkanLaunchCallOp->setAttr(kSPIRVEntryPointAttrName,
-                              launchOp.getKernelName());
-
-  // Add MemRef element types before they're lost when lowering to LLVM.
-  SmallVector<Type> elementTypes;
-  for (Type type : llvm::drop_begin(launchOp.getOperandTypes(),
-                                    gpu::LaunchOp::kNumConfigOperands)) {
-    // The below cast always succeeds as it has already been verified in
-    // 'declareVulkanLaunchFunc' that these are MemRefs with compatible element
-    // types.
-    elementTypes.push_back(cast<MemRefType>(type).getElementType());
-  }
-  vulkanLaunchCallOp->setAttr(kSPIRVElementTypesAttrName,
-                              builder.getTypeArrayAttr(elementTypes));
-
-  launchOp.erase();
-}
-
-std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
-mlir::createConvertGpuLaunchFuncToVulkanLaunchFuncPass() {
-  return std::make_unique<ConvertGpuLaunchFuncToVulkanLaunchFunc>();
-}
diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
deleted file mode 100644
index 938db54963068..0000000000000
--- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
+++ /dev/null
@@ -1,448 +0,0 @@
-//===- ConvertLaunchFuncToVulkanCalls.cpp - MLIR Vulkan conversion passes -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to convert vulkan launch call into a sequence of
-// Vulkan runtime calls. The Vulkan runtime API surface is huge so currently we
-// don't expose separate external functions in IR for each of them, instead we
-// expose a few external functions to wrapper libraries which manages Vulkan
-// runtime.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/FormatVariadic.h"
-
-namespace mlir {
-#define GEN_PASS_DEF_CONVERTVULKANLAUNCHFUNCTOVULKANCALLSPASS
-#include "mlir/Conversion/Passes.h.inc"
-} // namespace mlir
-
-using namespace mlir;
-
-static constexpr const char *kCInterfaceVulkanLaunch =
-    "_mlir_ciface_vulkanLaunch";
-static constexpr const char *kDeinitVulkan = "deinitVulkan";
-static constexpr const char *kRunOnVulkan = "runOnVulkan";
-static constexpr const char *kInitVulkan = "initVulkan";
-static constexpr const char *kSetBinaryShader = "setBinaryShader";
-static constexpr const char *kSetEntryPoint = "setEntryPoint";
-static constexpr const char *kSetNumWorkGroups = "setNumWorkGroups";
-static constexpr const char *kSPIRVBinary = "SPIRV_BIN";
-static constexpr const char *kSPIRVBlobAttrName = "spirv_blob";
-static constexpr const char *kSPIRVEntryPointAttrName = "spirv_entry_point";
-static constexpr const char *kSPIRVElementTypesAttrName = "spirv_element_types";
-static constexpr const char *kVulkanLaunch = "vulkanLaunch";
-
-namespace {
-
-/// A pass to convert vulkan launch call op into a sequence of Vulkan
-/// runtime calls in the following order:
-///
-/// * initVulkan           -- initializes vulkan runtime
-/// * bindMemRef           -- binds memref
-/// * setBinaryShader      -- sets the binary shader data
-/// * setEntryPoint        -- sets the entry point name
-/// * setNumWorkGroups     -- sets the number of a local workgroups
-/// * runOnVulkan          -- runs vulkan runtime
-/// * deinitVulkan         -- deinitializes vulkan runtime
-///
-class VulkanLaunchFuncToVulkanCallsPass
-    : public impl::ConvertVulkanLaunchFuncToVulkanCallsPassBase<
-          VulkanLaunchFuncToVulkanCallsPass> {
-private:
-  void initializeCachedTypes() {
-    llvmFloatType = Float32Type::get(&getContext());
-    llvmVoidType = LLVM::LLVMVoidType::get(&getContext());
-    llvmPointerType = LLVM::LLVMPointerType::get(&getContext());
-    llvmInt32Type = IntegerType::get(&getContext(), 32);
-    llvmInt64Type = IntegerType::get(&getContext(), 64);
-  }
-
-  Type getMemRefType(uint32_t rank, Type elemenType) {
-    // According to the MLIR doc memref argument is converted into a
-    // pointer-to-struct argument of type:
-    // template <typename Elem, size_t Rank>
-    // struct {
-    //   Elem *allocated;
-    //   Elem *aligned;
-    //   int64_t offset;
-    //   int64_t sizes[Rank]; // omitted when rank == 0
-    //   int64_t strides[Rank]; // omitted when rank == 0
-    // };
-    auto llvmArrayRankElementSizeType =
-        LLVM::LLVMArrayType::get(getInt64Type(), rank);
-
-    // Create a type
-    // `!llvm<"{ `element-type`*, `element-type`*, i64,
-    // [`rank` x i64], [`rank` x i64]}">`.
-    return LLVM::LLVMStructType::getLiteral(
-        &getContext(),
-        {llvmPointerType, llvmPointerType, getInt64Type(),
-         llvmArrayRankElementSizeType, llvmArrayRankElementSizeType});
-  }
-
-  Type getVoidType() { return llvmVoidType; }
-  Type getPointerType() { return llvmPointerType; }
-  Type getInt32Type() { return llvmInt32Type; }
-  Type getInt64Type() { return llvmInt64Type; }
-
-  /// Creates an LLVM global for the given `name`.
-  Value createEntryPointNameConstant(StringRef name, Location loc,
-                                     OpBuilder &builder);
-
-  /// Declares all needed runtime functions.
-  void declareVulkanFunctions(Location loc);
-
-  /// Checks whether the given LLVM::CallOp is a vulkan launch call op.
-  bool isVulkanLaunchCallOp(LLVM::CallOp callOp) {
-    return (callOp.getCallee() && *callOp.getCallee() == kVulkanLaunch &&
-            callOp.getNumOperands() >= kVulkanLaunchNumConfigOperands);
-  }
-
-  /// Checks whether the given LLVM::CallOp is a "ci_face" vulkan launch call
-  /// op.
-  bool isCInterfaceVulkanLaunchCallOp(LLVM::CallOp callOp) {
-    return (callOp.getCallee() &&
-            *callOp.getCallee() == kCInterfaceVulkanLaunch &&
-            callOp.getNumOperands() >= kVulkanLaunchNumConfigOperands);
-  }
-
-  /// Translates the given `vulkanLaunchCallOp` to the sequence of Vulkan
-  /// runtime calls.
-  void translateVulkanLaunchCall(LLVM::CallOp vulkanLaunchCallOp);
-
-  /// Creates call to `bindMemRef` for each memref operand.
-  void createBindMemRefCalls(LLVM::CallOp vulkanLaunchCallOp,
-                             Value vulkanRuntime);
-
-  /// Collects SPIRV attributes from the given `vulkanLaunchCallOp`.
-  void collectSPIRVAttributes(LLVM::CallOp vulkanLaunchCallOp);
-
-  /// Deduces a rank from the given 'launchCallArg`.
-  LogicalResult deduceMemRefRank(Value launchCallArg, uint32_t &rank);
-
-  /// Returns a string representation from the given `type`.
-  StringRef stringifyType(Type type) {
-    if (isa<Float32Type>(type))
-      return "Float";
-    if (isa<Float16Type>(type))
-      return "Half";
-    if (auto intType = dyn_cast<IntegerType>(type)) {
-      if (intType.getWidth() == 32)
-        return "Int32";
-      if (intType.getWidth() == 16)
-        return "Int16";
-      if (intType.getWidth() == 8)
-        return "Int8";
-    }
-
-    llvm_unreachable("unsupported type");
-  }
-
-public:
-  using Base::Base;
-
-  void runOnOperation() override;
-
-private:
-  Type llvmFloatType;
-  Type llvmVoidType;
-  Type llvmPointerType;
-  Type llvmInt32Type;
-  Type llvmInt64Type;
-
-  struct SPIRVAttributes {
-    StringAttr blob;
-    StringAttr entryPoint;
-    SmallVector<Type> elementTypes;
-  };
-
-  // TODO: Use an associative array to support multiple vulkan launch calls.
-  SPIRVAttributes spirvAttributes;
-  /// The number of vulkan launch configuration operands, placed at the leading
-  /// positions of the operand list.
-  static constexpr unsigned kVulkanLaunchNumConfigOperands = 3;
-};
-
-} // namespace
-
-void VulkanLaunchFuncToVulkanCallsPass::runOnOperation() {
-  initializeCachedTypes();
-
-  // Collect SPIR-V attributes such as `spirv_blob` and
-  // `spirv_entry_point_name`.
-  getOperation().walk([this](LLVM::CallOp op) {
-    if (isVulkanLaunchCallOp(op))
-      collectSPIRVAttributes(op);
-  });
-
-  // Convert vulkan launch call op into a sequence of Vulkan runtime calls.
-  getOperation().walk([this](LLVM::CallOp op) {
-    if (isCInterfaceVulkanLaunchCallOp(op))
-      translateVulkanLaunchCall(op);
-  });
-}
-
-void VulkanLaunchFuncToVulkanCallsPass::collectSPIRVAttributes(
-    LLVM::CallOp vulkanLaunchCallOp) {
-  // Check that `kSPIRVBinary` and `kSPIRVEntryPoint` are present in attributes
-  // for the given vulkan launch call.
-  auto spirvBlobAttr =
-      vulkanLaunchCallOp->getAttrOfType<StringAttr>(kSPIRVBlobAttrName);
-  if (!spirvBlobAttr) {
-    vulkanLaunchCallOp.emitError()
-        << "missing " << kSPIRVBlobAttrName << " attribute";
-    return signalPassFailure();
-  }
-
-  auto spirvEntryPointNameAttr =
-      vulkanLaunchCallOp->getAttrOfType<StringAttr>(kSPIRVEntryPointAttrName);
-  if (!spirvEntryPointNameAttr) {
-    vulkanLaunchCallOp.emitError()
-        << "missing " << kSPIRVEntryPointAttrName << " attribute";
-    return signalPassFailure();
-  }
-
-  auto spirvElementTypesAttr =
-      vulkanLaunchCallOp->getAttrOfType<ArrayAttr>(kSPIRVElementTypesAttrName);
-  if (!spirvElementTypesAttr) {
-    vulkanLaunchCallOp.emitError()
-        << "missing " << kSPIRVElementTypesAttrName << " attribute";
-    return signalPassFailure();
-  }
-  if (llvm::any_of(spirvElementTypesAttr,
-                   [](Attribute attr) { return !isa<TypeAttr>(attr); })) {
-    vulkanLaunchCallOp.emitError()
-        << "expected " << spirvElementTypesAttr << " to be an array of types";
-    return signalPassFailure();
-  }
-
-  spirvAttributes.blob = spirvBlobAttr;
-  spirvAttributes.entryPoint = spirvEntryPointNameAttr;
-  spirvAttributes.elementTypes =
-      llvm::to_vector(spirvElementTypesAttr.getAsValueRange<mlir::TypeAttr>());
-}
-
-void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls(
-    LLVM::CallOp cInterfaceVulkanLaunchCallOp, Value vulkanRuntime) {
-  if (cInterfaceVulkanLaunchCallOp.getNumOperands() ==
-      kVulkanLaunchNumConfigOperands)
-    return;
-  OpBuilder builder(cInterfaceVulkanLaunchCallOp);
-  Location loc = cInterfaceVulkanLaunchCallOp.getLoc();
-
-  // Create LLVM constant for the descriptor set index.
-  // Bind all memrefs to the `0` descriptor set, the same way as `GPUToSPIRV`
-  // pass does.
-  Value descriptorSet =
-      builder.create<LLVM::ConstantOp>(loc, getInt32Type(), 0);
-
-  for (auto [index, ptrToMemRefDescriptor] :
-       llvm::enumerate(cInterfaceVulkanLaunchCallOp.getOperands().drop_front(
-           kVulkanLaunchNumConfigOperands))) {
-    // Create LLVM constant for the descriptor binding index.
-    Value descriptorBinding =
-        builder.create<LLVM::ConstantOp>(loc, getInt32Type(), index);
-
-    if (index >= spirvAttributes.elementTypes.size()) {
-      cInterfaceVulkanLaunchCallOp.emitError()
-          << kSPIRVElementTypesAttrName << " missing element type for "
-          << ptrToMemRefDescriptor;
-      return signalPassFailure();
-    }
-
-    uint32_t rank = 0;
-    Type type = spirvAttributes.elementTypes[index];
-    if (failed(deduceMemRefRank(ptrToMemRefDescriptor, rank))) {
-      cInterfaceVulkanLaunchCallOp.emitError()
-          << "invalid memref descriptor " << ptrToMemRefDescriptor.getType();
-      return signalPassFailure();
-    }
-
-    auto symbolName =
-        llvm::formatv("bindMemRef{0}D{1}", rank, stringifyType(type)).str();
-    // Create call to `bindMemRef`.
-    builder.create<LLVM::CallOp>(
-        loc, TypeRange(), StringRef(symbolName.data(), symbolName.size()),
-        ValueRange{vulkanRuntime, descriptorSet, descriptorBinding,
-                   ptrToMemRefDescriptor});
-  }
-}
-
-LogicalResult
-VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRank(Value launchCallArg,
-                                                    uint32_t &rank) {
-  // Deduce the rank from the type used to allocate the lowered MemRef.
-  auto alloca = launchCallArg.getDefiningOp<LLVM::AllocaOp>();
-  if (!alloca)
-    return failure();
-
-  std::optional<Type> elementType = alloca.getElemType();
-  assert(elementType && "expected to work with opaque pointers");
-  auto llvmDescriptorTy = dyn_cast<LLVM::LLVMStructType>(*elementType);
-  // template <typename Elem, size_t Rank>
-  // struct {
-  //   Elem *allocated;
-  //   Elem *aligned;
-  //   int64_t offset;
-  //   int64_t sizes[Rank]; // omitted when rank == 0
-  //   int64_t strides[Rank]; // omitted when rank == 0
-  // };
-  if (!llvmDescriptorTy)
-    return failure();
-
-  if (llvmDescriptorTy.getBody().size() == 3) {
-    rank = 0;
-    return success();
-  }
-  rank =
-      cast<LLVM::LLVMArrayType>(llvmDescriptorTy.getBody()[3]).getNumElements();
-  return success();
-}
-
-void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) {
-  ModuleOp module = getOperation();
-  auto builder = OpBuilder::atBlockEnd(module.getBody());
-
-  if (!module.lookupSymbol(kSetEntryPoint)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kSetEntryPoint,
-        LLVM::LLVMFunctionType::get(getVoidType(),
-                                    {getPointerType(), getPointerType()}));
-  }
-
-  if (!module.lookupSymbol(kSetNumWorkGroups)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kSetNumWorkGroups,
-        LLVM::LLVMFunctionType::get(getVoidType(),
-                                    {getPointerType(), getInt64Type(),
-                                     getInt64Type(), getInt64Type()}));
-  }
-
-  if (!module.lookupSymbol(kSetBinaryShader)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kSetBinaryShader,
-        LLVM::LLVMFunctionType::get(
-            getVoidType(),
-            {getPointerType(), getPointerType(), getInt32Type()}));
-  }
-
-  if (!module.lookupSymbol(kRunOnVulkan)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kRunOnVulkan,
-        LLVM::LLVMFunctionType::get(getVoidType(), {getPointerType()}));
-  }
-
-  for (unsigned i = 1; i <= 3; i++) {
-    SmallVector<Type, 5> types{
-        Float32Type::get(&getContext()), IntegerType::get(&getContext(), 32),
-        IntegerType::get(&getContext(), 16), IntegerType::get(&getContext(), 8),
-        Float16Type::get(&getContext())};
-    for (auto type : types) {
-      std::string fnName = "bindMemRef" + std::to_string(i) + "D" +
-                           std::string(stringifyType(type));
-      if (isa<Float16Type>(type))
-        type = IntegerType::get(&getContext(), 16);
-      if (!module.lookupSymbol(fnName)) {
-        auto fnType = LLVM::LLVMFunctionType::get(
-            getVoidType(),
-            {llvmPointerType, getInt32Type(), getInt32Type(), llvmPointerType},
-            /*isVarArg=*/false);
-        builder.create<LLVM::LLVMFuncOp>(loc, fnName, fnType);
-      }
-    }
-  }
-
-  if (!module.lookupSymbol(kInitVulkan)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kInitVulkan, LLVM::LLVMFunctionType::get(getPointerType(), {}));
-  }
-
-  if (!module.lookupSymbol(kDeinitVulkan)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kDeinitVulkan,
-        LLVM::LLVMFunctionType::get(getVoidType(), {getPointerType()}));
-  }
-}
-
-Value VulkanLaunchFuncToVulkanCallsPass::createEntryPointNameConstant(
-    StringRef name, Location loc, OpBuilder &builder) {
-  SmallString<16> shaderName(name.begin(), name.end());
-  // Append `\0` to follow C style string given that LLVM::createGlobalString()
-  // won't handle this directly for us.
-  shaderName.push_back('\0');
-
-  std::string entryPointGlobalName = (name + "_spv_entry_point_name").str();
-  return LLVM::createGlobalString(loc, builder, entryPointGlobalName,
-                                  shaderName, LLVM::Linkage::Internal);
-}
-
-void VulkanLaunchFuncToVulkanCallsPass::translateVulkanLaunchCall(
-    LLVM::CallOp cInterfaceVulkanLaunchCallOp) {
-  OpBuilder builder(cInterfaceVulkanLaunchCallOp);
-  Location loc = cInterfaceVulkanLaunchCallOp.getLoc();
-  // Create call to `initVulkan`.
-  auto initVulkanCall = builder.create<LLVM::CallOp>(
-      loc, TypeRange{getPointerType()}, kInitVulkan);
-  // The result of `initVulkan` function is a pointer to Vulkan runtime, we
-  // need to pass that pointer to each Vulkan runtime call.
-  auto vulkanRuntime = initVulkanCall.getResult();
-
-  // Create LLVM global with SPIR-V binary data, so we can pass a pointer with
-  // that data to runtime call.
-  Value ptrToSPIRVBinary = LLVM::createGlobalString(
-      loc, builder, kSPIRVBinary, spirvAttributes.blob.getValue(),
-      LLVM::Linkage::Internal);
-
-  // Create LLVM constant for the size of SPIR-V binary shader.
-  Value binarySize = builder.create<LLVM::ConstantOp>(
-      loc, getInt32Type(), spirvAttributes.blob.getValue().size());
-
-  // Create call to `bindMemRef` for each memref operand.
-  createBindMemRefCalls(cInterfaceVulkanLaunchCallOp, vulkanRuntime);
-
-  // Create call to `setBinaryShader` runtime function with the given pointer to
-  // SPIR-V binary and binary size.
-  builder.create<LLVM::CallOp>(
-      loc, TypeRange(), kSetBinaryShader,
-      ValueRange{vulkanRuntime, ptrToSPIRVBinary, binarySize});
-  // Create LLVM global with entry point name.
-  Value entryPointName = createEntryPointNameConstant(
-      spirvAttributes.entryPoint.getValue(), loc, builder);
-  // Create call to `setEntryPoint` runtime function with the given pointer to
-  // entry point name.
-  builder.create<LLVM::CallOp>(loc, TypeRange(), kSetEntryPoint,
-                               ValueRange{vulkanRuntime, entryPointName});
-
-  // Create number of local workgroup for each dimension.
-  builder.create<LLVM::CallOp>(
-      loc, TypeRange(), kSetNumWorkGroups,
-      ValueRange{vulkanRuntime, cInterfaceVulkanLaunchCallOp.getOperand(0),
-                 cInterfaceVulkanLaunchCallOp.getOperand(1),
-                 cInterfaceVulkanLaunchCallOp.getOperand(2)});
-
-  // Create call to `runOnVulkan` runtime function.
-  builder.create<LLVM::CallOp>(loc, TypeRange(), kRunOnVulkan,
-                               ValueRange{vulkanRuntime});
-
-  // Create call to 'deinitVulkan' runtime function.
-  builder.create<LLVM::CallOp>(loc, TypeRange(), kDeinitVulkan,
-                               ValueRange{vulkanRuntime});
-
-  // Declare runtime functions.
-  declareVulkanFunctions(loc);
-
-  cInterfaceVulkanLaunchCallOp.erase();
-}
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index 12fa94b9e62fb..aaf743ab28003 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -1599,7 +1599,7 @@ class MaterializeResizeBroadcast : public OpConversionPattern<tosa::ResizeOp> {
       reassociationMap.push_back({});
     reassociationMap.back().push_back(builder.getAffineDimExpr(3));
 
-    llvm::SmallVector<int64_t> collapseShape{batch};
+    llvm::SmallVector<int64_t> collapseShape = {batch};
     if (inputH != 1)
       collapseShape.push_back(outputH);
     if (inputW != 1)
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index 638b99e7602d6..d4634127c5a84 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -727,12 +727,12 @@ class FullyConnectedConverter
 
     SmallVector<Value> filteredDims = condenseValues(dynDims);
 
-    SmallVector<int64_t> permutation{1, 0};
+    SmallVector<int64_t> permutation = {1, 0};
     auto permutationAttr = rewriter.getI64TensorAttr(permutation);
     Value permutationValue =
         rewriter.create<arith::ConstantOp>(loc, permutationAttr);
 
-    SmallVector<int64_t> newWeightShape{weightShape[1], weightShape[0]};
+    SmallVector<int64_t> newWeightShape = {weightShape[1], weightShape[0]};
     Type newWeightTy =
         RankedTensorType::get(newWeightShape, weightTy.getElementType());
 
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index 2c4c5ada9815d..e3a81bd20212d 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -69,12 +69,11 @@ void ConvertVectorToLLVMPass::runOnOperation() {
     populateVectorToVectorCanonicalizationPatterns(patterns);
     populateVectorBitCastLoweringPatterns(patterns);
     populateVectorBroadcastLoweringPatterns(patterns);
-    populateVectorContractLoweringPatterns(patterns, VectorTransformsOptions());
+    populateVectorContractLoweringPatterns(patterns, vectorTransformsOptions);
     populateVectorMaskOpLoweringPatterns(patterns);
     populateVectorShapeCastLoweringPatterns(patterns);
     populateVectorInterleaveLoweringPatterns(patterns);
-    populateVectorTransposeLoweringPatterns(patterns,
-                                            VectorTransformsOptions());
+    populateVectorTransposeLoweringPatterns(patterns, vectorTransformsOptions);
     // Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
     populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
     populateVectorMaskMaterializationPatterns(patterns,
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index d3229d2e91296..dc4ee4e926bb4 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -182,7 +182,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
           readOp, "Unsupported data type for tranposition");
 
     // If load is transposed, get the base shape for the tensor descriptor.
-    SmallVector<int64_t> descShape{vecTy.getShape()};
+    SmallVector<int64_t> descShape(vecTy.getShape());
     if (isTransposeLoad)
       std::reverse(descShape.begin(), descShape.end());
     auto descType = xegpu::TensorDescType::get(
diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp
index 2c0c84d055f59..2a1271dfd6bdf 100644
--- a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp
+++ b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp
@@ -126,8 +126,8 @@ class LowerContractionToSMMLAPattern
         loc, op.getResultType(), rewriter.getZeroAttr(op.getResultType()));
 
     SmallVector<int64_t> unrolledSize = *op.getShapeForUnroll();
-    SmallVector<int64_t> smmlaShape{2, 8};
-    SmallVector<int64_t> loopOrder{0, 1};
+    SmallVector<int64_t> smmlaShape = {2, 8};
+    SmallVector<int64_t> loopOrder = {0, 1};
     if (unrolledSize.size() == 3) {
       smmlaShape.insert(smmlaShape.begin(), isVecmat ? 1 : 2);
       loopOrder.push_back(2);
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
index e6162ad97d784..0657a87d1d1ac 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
@@ -222,7 +222,7 @@ transform::gpu::CopyMappingInfo::inferNumThreadsImpl(
   // Scale the most minor size to account for the chosen vector size and
   // maximize the number of threads without exceeding the total number of
   // threads.
-  SmallVector<int64_t> scaledSizes{sizes};
+  SmallVector<int64_t> scaledSizes(sizes);
   scaledSizes.back() /= desiredVectorSize;
   if (scaledSizes.back() > totalNumThreads) {
     LDBG("--Too few threads given the required vector size -> FAIL");
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
index 57344f986480d..ed1685a9cb9e6 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
@@ -55,7 +55,7 @@ static bool validateFullTilesOnDims(linalg::LinalgOp linalgOp,
 
   // Skip the batch dimension if present.
   // Offset all dimensions accordingly.
-  SmallVector<int64_t, 3> offsetDims{dims};
+  SmallVector<int64_t, 3> offsetDims(dims);
   for (size_t i = 0; i < offsetDims.size(); i++)
     offsetDims[i] += batchDimsOffset;
 
@@ -111,10 +111,10 @@ transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
 
   // Transpose only the dimensions that need that to conform to the provided
   // transpotion settings.
-  SmallVector<int64_t> innerPerm{0, 1};
+  SmallVector<int64_t> innerPerm = {0, 1};
   if (isInnerTransposed != transposeInnerBlocks)
     innerPerm = {1, 0};
-  SmallVector<int64_t> outerPerm{0, 1};
+  SmallVector<int64_t> outerPerm = {0, 1};
   if (isOuterTransposed != transposeOuterBlocks)
     outerPerm = {1, 0};
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/TransposeConv2D.cpp b/mlir/lib/Dialect/Linalg/Transforms/TransposeConv2D.cpp
index 1294043bf3837..bdaf1f8666b92 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TransposeConv2D.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TransposeConv2D.cpp
@@ -52,7 +52,7 @@ FailureOr<Operation *> transposeConv2DHelper(RewriterBase &rewriter,
                                              FHWCConvOp op) {
   // Construct a permutation of the filter tensor dimensions. For a 2D
   // convolution this will be known statically as [1, 2, 3, 0].
-  SmallVector<int64_t> filterPerm({1, 2, 3, 0});
+  SmallVector<int64_t> filterPerm = {1, 2, 3, 0};
 
   // Create the type for the transposed filter tensor.
   auto filter = op->getOperand(1);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 863f2280e46ce..299bbc226dec8 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -86,8 +86,8 @@ extractConvInputSlices(RewriterBase &rewriter, Location loc, Value input,
   if (isSingleChanneled) {
     // Extract input slice of size {wSizeStep} @ [w + kw] for non-channeled
     // convolution.
-    SmallVector<int64_t> sizes{wSizeStep};
-    SmallVector<int64_t> strides{1};
+    SmallVector<int64_t> sizes = {wSizeStep};
+    SmallVector<int64_t> strides = {1};
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       for (int64_t w = 0; w < wSize; w += wSizeStep) {
         result.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
@@ -97,8 +97,8 @@ extractConvInputSlices(RewriterBase &rewriter, Location loc, Value input,
   } else {
     // Extract lhs slice of size {n, wSizeStep, c} @ [0, sw * w + dw * kw, 0]
     // for channeled convolution.
-    SmallVector<int64_t> sizes{nSize, wSizeStep, cSize};
-    SmallVector<int64_t> strides{1, 1, 1};
+    SmallVector<int64_t> sizes = {nSize, wSizeStep, cSize};
+    SmallVector<int64_t> strides = {1, 1, 1};
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       for (int64_t w = 0; w < wSize; w += wSizeStep) {
         result.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
@@ -135,8 +135,8 @@ extractConvResultSlices(RewriterBase &rewriter, Location loc, Value res,
   SmallVector<Value> result;
   if (isSingleChanneled) {
     // Extract res slice: {wSizeStep} @ [w] for non-channeled convolution.
-    SmallVector<int64_t> sizes{wSizeStep};
-    SmallVector<int64_t> strides{1};
+    SmallVector<int64_t> sizes = {wSizeStep};
+    SmallVector<int64_t> strides = {1};
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       result.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
           loc, res, /*offsets=*/ArrayRef<int64_t>{w}, sizes, strides));
@@ -144,8 +144,8 @@ extractConvResultSlices(RewriterBase &rewriter, Location loc, Value res,
   } else {
     // Extract res slice: {n, wSizeStep, f} @ [0, w, 0] for channeled
     // convolution.
-    SmallVector<int64_t> sizes{nSize, wSizeStep, fSize};
-    SmallVector<int64_t> strides{1, 1, 1};
+    SmallVector<int64_t> sizes = {nSize, wSizeStep, fSize};
+    SmallVector<int64_t> strides = {1, 1, 1};
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       result.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
           loc, res, /*offsets=*/ArrayRef<int64_t>{0, w, 0}, sizes, strides));
@@ -163,7 +163,7 @@ static Value insertConvResultSlices(RewriterBase &rewriter, Location loc,
   if (isSingleChanneled) {
     // Write back res slice: {wSizeStep} @ [w] for non-channeled convolution.
     // This does not depend on kw.
-    SmallVector<int64_t> strides{1};
+    SmallVector<int64_t> strides = {1};
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       res = rewriter.create<vector::InsertStridedSliceOp>(
           loc, resVals[w], res, /*offsets=*/ArrayRef<int64_t>{w}, strides);
@@ -171,7 +171,7 @@ static Value insertConvResultSlices(RewriterBase &rewriter, Location loc,
   } else {
     // Write back res slice: {n, wSizeStep, f} @ [0, w, 0] for channeled
     // convolution. This does not depend on kw.
-    SmallVector<int64_t> strides{1, 1, 1};
+    SmallVector<int64_t> strides = {1, 1, 1};
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       res = rewriter.create<vector::InsertStridedSliceOp>(
           loc, resVals[w], res, /*offsets=*/ArrayRef<int64_t>{0, w, 0},
@@ -3505,8 +3505,8 @@ struct Conv1DGenerator
     //===------------------------------------------------------------------===//
     // Unroll along kw and read slices of lhs and rhs.
     SmallVector<Value> lhsVals, rhsVals, resVals;
-    auto inOutSliceSizes = SmallVector<int64_t>{nSize, wSizeStep, cSize};
-    auto inOutStrides = SmallVector<int64_t>{1, 1, 1};
+    SmallVector<int64_t> inOutSliceSizes = {nSize, wSizeStep, cSize};
+    SmallVector<int64_t> inOutStrides = {1, 1, 1};
 
     // Extract lhs slice of size {n, wSizeStep, c}
     //   @ [0, sw * w + dw * kw, 0].
@@ -3538,8 +3538,7 @@ struct Conv1DGenerator
 
     // Note - the scalable flags are ignored as flattening combined with
     // scalable vectorization is not supported.
-    auto inOutFlattenSliceSizes =
-        SmallVector<int64_t>{nSize, wSizeStep * cSize};
+    SmallVector<int64_t> inOutFlattenSliceSizes = {nSize, wSizeStep * cSize};
     auto lhsTypeAfterFlattening =
         VectorType::get(inOutFlattenSliceSizes, lhsEltType);
     auto resTypeAfterFlattening =
diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
index 3c508ed6e324b..556922a64b093 100644
--- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
@@ -740,9 +740,9 @@ static std::tuple<SmallVector<int64_t>, SmallVector<int64_t>,
                   SmallVector<int64_t>>
 makeVectorShapes(ArrayRef<int64_t> lhs, ArrayRef<int64_t> rhs,
                  ArrayRef<int64_t> res) {
-  SmallVector<int64_t> vlhs{lhs};
-  SmallVector<int64_t> vrhs{rhs};
-  SmallVector<int64_t> vres{res};
+  SmallVector<int64_t> vlhs(lhs);
+  SmallVector<int64_t> vrhs(rhs);
+  SmallVector<int64_t> vres(res);
   return std::make_tuple(vlhs, vrhs, vres);
 }
 
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
index 314dc44134e04..c5d29c09b39b3 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
@@ -492,60 +492,6 @@ struct TransferReadToVectorLoadLowering
   std::optional<unsigned> maxTransferRank;
 };
 
-/// Replace a 0-d vector.load with a memref.load + vector.broadcast.
-// TODO: we shouldn't cross the vector/scalar domains just for this
-// but atm we lack the infra to avoid it. Possible solutions include:
-// - go directly to LLVM + bitcast
-// - introduce a bitcast op and likely a new pointer dialect
-// - let memref.load/store additionally support the 0-d vector case
-// There are still deeper data layout issues lingering even in this
-// trivial case (for architectures for which this matters).
-struct VectorLoadToMemrefLoadLowering
-    : public OpRewritePattern<vector::LoadOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::LoadOp loadOp,
-                                PatternRewriter &rewriter) const override {
-    auto vecType = loadOp.getVectorType();
-    if (vecType.getNumElements() != 1)
-      return rewriter.notifyMatchFailure(loadOp, "not a single element vector");
-
-    auto memrefLoad = rewriter.create<memref::LoadOp>(
-        loadOp.getLoc(), loadOp.getBase(), loadOp.getIndices());
-    rewriter.replaceOpWithNewOp<vector::BroadcastOp>(loadOp, vecType,
-                                                     memrefLoad);
-    return success();
-  }
-};
-
-/// Replace a 0-d vector.store with a vector.extractelement + memref.store.
-struct VectorStoreToMemrefStoreLowering
-    : public OpRewritePattern<vector::StoreOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::StoreOp storeOp,
-                                PatternRewriter &rewriter) const override {
-    auto vecType = storeOp.getVectorType();
-    if (vecType.getNumElements() != 1)
-      return rewriter.notifyMatchFailure(storeOp, "not single element vector");
-
-    Value extracted;
-    if (vecType.getRank() == 0) {
-      // TODO: Unifiy once ExtractOp supports 0-d vectors.
-      extracted = rewriter.create<vector::ExtractElementOp>(
-          storeOp.getLoc(), storeOp.getValueToStore());
-    } else {
-      SmallVector<int64_t> indices(vecType.getRank(), 0);
-      extracted = rewriter.create<vector::ExtractOp>(
-          storeOp.getLoc(), storeOp.getValueToStore(), indices);
-    }
-
-    rewriter.replaceOpWithNewOp<memref::StoreOp>(
-        storeOp, extracted, storeOp.getBase(), storeOp.getIndices());
-    return success();
-  }
-};
-
 /// Progressive lowering of transfer_write. This pattern supports lowering of
 /// `vector.transfer_write` to `vector.store` if all of the following hold:
 /// - Stride of most minor memref dimension must be 1.
@@ -645,7 +591,4 @@ void mlir::vector::populateVectorTransferLoweringPatterns(
   patterns.add<TransferReadToVectorLoadLowering,
                TransferWriteToVectorStoreLowering>(patterns.getContext(),
                                                    maxTransferRank, benefit);
-  patterns
-      .add<VectorLoadToMemrefLoadLowering, VectorStoreToMemrefStoreLowering>(
-          patterns.getContext(), benefit);
 }
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
index 42ac717b44c4b..3035c419a1b56 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
@@ -557,7 +557,7 @@ struct CastAwayConstantMaskLeadingOneDim
     int64_t flatLeadingSize =
         std::accumulate(dimSizes.begin(), dimSizes.begin() + dropDim + 1,
                         static_cast<int64_t>(1), std::multiplies<int64_t>());
-    SmallVector<int64_t> newDimSizes({flatLeadingSize});
+    SmallVector<int64_t> newDimSizes = {flatLeadingSize};
     newDimSizes.append(dimSizes.begin() + dropDim + 1, dimSizes.end());
 
     auto newMask = rewriter.create<vector::ConstantMaskOp>(
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 84c1deaebcd00..d9be8d0e578ae 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -930,8 +930,8 @@ struct BreakDownVectorBitCast : public OpRewritePattern<vector::BitCastOp> {
         loc, elemType, rewriter.getZeroAttr(elemType));
     Value res = rewriter.create<SplatOp>(loc, castDstType, zero);
 
-    SmallVector<int64_t> sliceShape{castDstLastDim};
-    SmallVector<int64_t> strides{1};
+    SmallVector<int64_t> sliceShape = {castDstLastDim};
+    SmallVector<int64_t> strides = {1};
     VectorType newCastDstType =
         VectorType::get(SmallVector<int64_t>{castDstLastDim / shrinkRatio},
                         castDstType.getElementType());
diff --git a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
index edd939eda7c59..3fc05c8cb8707 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
+++ b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
@@ -66,8 +66,8 @@ Value mlir::x86vector::avx2::intrin::mm256ShufflePs(ImplicitLocOpBuilder &b,
                                                     uint8_t mask) {
   uint8_t b01, b23, b45, b67;
   MaskHelper::extractShuffle(mask, b01, b23, b45, b67);
-  SmallVector<int64_t> shuffleMask{b01,     b23,     b45 + 8,     b67 + 8,
-                                   b01 + 4, b23 + 4, b45 + 8 + 4, b67 + 8 + 4};
+  SmallVector<int64_t> shuffleMask = {
+      b01, b23, b45 + 8, b67 + 8, b01 + 4, b23 + 4, b45 + 8 + 4, b67 + 8 + 4};
   return b.create<vector::ShuffleOp>(v1, v2, shuffleMask);
 }
 
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index cf44a02cf5cb9..dd2ac75b88798 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -16,6 +16,9 @@ set(LLVM_OPTIONAL_SOURCES
   JitRunner.cpp
   SpirvCpuRuntimeWrappers.cpp
   SyclRuntimeWrappers.cpp
+  VulkanRuntimeWrappers.cpp
+  VulkanRuntime.cpp
+  VulkanRuntime.h
   )
 
 # Use a separate library for OptUtils, to avoid pulling in the entire JIT and
@@ -88,8 +91,9 @@ add_mlir_library(MLIRExecutionEngine
   IPO
   Passes
   ${LLVM_JIT_LISTENER_LIB}
+  )
 
-  LINK_LIBS PUBLIC
+mlir_target_link_libraries(MLIRExecutionEngine PUBLIC
   MLIRBuiltinToLLVMIRTranslation
   MLIRExecutionEngineUtils
   MLIRLLVMDialect
@@ -136,8 +140,10 @@ add_mlir_library(MLIRJitRunner
   JITLink
 
   LINK_LIBS PUBLIC
-  ${dialect_libs}
   MLIRExecutionEngine
+)
+mlir_target_link_libraries(MLIRJitRunner PUBLIC
+  ${dialect_libs}
   MLIRFuncDialect
   MLIRFuncToLLVM
   MLIRIR
@@ -415,4 +421,46 @@ if(LLVM_ENABLE_PIC)
       PRIVATE
       mlir_spirv_cpu_runtime_EXPORTS)
   endif()
+
+  if (MLIR_ENABLE_VULKAN_RUNNER)
+    find_package(Vulkan)
+
+    # If Vulkan is not found try a path specified by VULKAN_SDK.
+    if (NOT Vulkan_FOUND)
+      if ("$ENV{VULKAN_SDK}" STREQUAL "")
+        message(FATAL_ERROR "Vulkan not found through CMake; please provide "
+                            "VULKAN_SDK path as an environment variable")
+      endif()
+
+      find_library(Vulkan_LIBRARY vulkan HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
+      if (Vulkan_LIBRARY)
+        set(Vulkan_FOUND ON)
+        set(Vulkan_INCLUDE_DIR "$ENV{VULKAN_SDK}/include")
+        message(STATUS "Found Vulkan: " ${Vulkan_LIBRARY})
+      endif()
+    endif()
+
+    if (NOT Vulkan_FOUND)
+      message(FATAL_ERROR "Cannot find Vulkan library")
+    endif()
+
+    add_llvm_library(mlir_vulkan_runtime SHARED
+      VulkanRuntimeWrappers.cpp
+      VulkanRuntime.cpp
+    )
+
+    target_include_directories(mlir_vulkan_runtime
+      PUBLIC
+      ${Vulkan_INCLUDE_DIR}
+    )
+
+    # *IMPORTANT*: This library cannot depend on LLVM libraries. Otherwise,
+    # it may cause LLVM version conflict when used together with other shared
+    # libraries depending on LLVM. Notably, Mesa, who implements Vulkan
+    # drivers on Linux, depends on the system libLLVM.so.
+    target_link_libraries(mlir_vulkan_runtime
+      PUBLIC
+      ${Vulkan_LIBRARY}
+    )
+  endif()
 endif()
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
index 15024b2475b91..6ef1529343453 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
@@ -11,8 +11,8 @@ add_mlir_library(MLIRSparseTensorRuntime
   Storage.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRSparseTensorRuntime PUBLIC
   MLIRSparseTensorEnums
   mlir_float16_utils
   )
diff --git a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp b/mlir/lib/ExecutionEngine/VulkanRuntime.cpp
similarity index 100%
rename from mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp
rename to mlir/lib/ExecutionEngine/VulkanRuntime.cpp
diff --git a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.h b/mlir/lib/ExecutionEngine/VulkanRuntime.h
similarity index 100%
rename from mlir/tools/mlir-vulkan-runner/VulkanRuntime.h
rename to mlir/lib/ExecutionEngine/VulkanRuntime.h
diff --git a/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp b/mlir/lib/ExecutionEngine/VulkanRuntimeWrappers.cpp
similarity index 68%
rename from mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp
rename to mlir/lib/ExecutionEngine/VulkanRuntimeWrappers.cpp
index ffd1114cec6aa..c414fe53b2f3c 100644
--- a/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp
+++ b/mlir/lib/ExecutionEngine/VulkanRuntimeWrappers.cpp
@@ -1,4 +1,4 @@
-//===- vulkan-runtime-wrappers.cpp - MLIR Vulkan runner wrapper library ---===//
+//===- VulkanRuntimeWrappers.cpp - MLIR Vulkan runner wrapper library -----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -113,23 +113,12 @@ struct MemRefDescriptor {
   int64_t strides[N];
 };
 
-template <typename T, uint32_t S>
-void bindMemRef(void *vkRuntimeManager, DescriptorSetIndex setIndex,
-                BindingIndex bindIndex, MemRefDescriptor<T, S> *ptr) {
-  uint32_t size = sizeof(T);
-  for (unsigned i = 0; i < S; i++)
-    size *= ptr->sizes[i];
-  VulkanHostMemoryBuffer memBuffer{ptr->aligned, size};
-  reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager)
-      ->setResourceData(setIndex, bindIndex, memBuffer);
-}
-
 extern "C" {
 
 //===----------------------------------------------------------------------===//
 //
-// New wrappers, intended for mlir-cpu-runner. Calls to these are generated by
-// GPUToLLVMConversionPass.
+// Wrappers intended for mlir-cpu-runner. Uses of GPU dialect operations get
+// lowered to calls to these functions by GPUToLLVMConversionPass.
 //
 //===----------------------------------------------------------------------===//
 
@@ -169,26 +158,21 @@ mgpuLaunchKernel(void *vkKernel, size_t gridX, size_t gridY, size_t gridZ,
                  void ** /*extra*/, size_t paramsCount) {
   auto manager = static_cast<VulkanRuntimeManager *>(vkRuntimeManager);
 
-  // The non-bare-pointer memref ABI interacts badly with mgpuLaunchKernel's
-  // signature:
-  // - The memref descriptor struct gets split into several elements, each
-  //   passed as their own "param".
-  // - No metadata is provided as to the rank or element type/size of a memref.
-  //   Here we assume that all MemRefs have rank 1 and an element size of
-  //   4 bytes. This means each descriptor struct will have five members.
-  // TODO(https://github.com/llvm/llvm-project/issues/73457): Refactor the
-  //       ABI/API of mgpuLaunchKernel to use a different ABI for memrefs, so
-  //       that other memref types can also be used. This will allow migrating
-  //       the remaining tests and removal of mlir-vulkan-runner.
-  const size_t paramsPerMemRef = 5;
+  // GpuToLLVMConversionPass with the kernelBarePtrCallConv and
+  // kernelIntersperseSizeCallConv options will set up the params array like:
+  // { &memref_ptr0, &memref_size0, &memref_ptr1, &memref_size1, ... }
+  const size_t paramsPerMemRef = 2;
   if (paramsCount % paramsPerMemRef != 0) {
-    abort();
+    abort(); // This would indicate a serious calling convention mismatch.
   }
   const DescriptorSetIndex setIndex = 0;
   BindingIndex bindIndex = 0;
   for (size_t i = 0; i < paramsCount; i += paramsPerMemRef) {
-    auto memref = static_cast<MemRefDescriptor<uint32_t, 1> *>(params[i]);
-    bindMemRef<uint32_t, 1>(manager, setIndex, bindIndex, memref);
+    void *memrefBufferBasePtr = *static_cast<void **>(params[i + 0]);
+    size_t memrefBufferSize = *static_cast<size_t *>(params[i + 1]);
+    VulkanHostMemoryBuffer memBuffer{memrefBufferBasePtr,
+                                     static_cast<uint32_t>(memrefBufferSize)};
+    manager->setResourceData(setIndex, bindIndex, memBuffer);
     ++bindIndex;
   }
 
@@ -208,68 +192,10 @@ mgpuLaunchKernel(void *vkKernel, size_t gridX, size_t gridY, size_t gridZ,
 
 //===----------------------------------------------------------------------===//
 //
-// Old wrappers, intended for mlir-vulkan-runner. Calls to these are generated
-// by LaunchFuncToVulkanCallsPass.
+// Miscellaneous utility functions that can be directly used by tests.
 //
 //===----------------------------------------------------------------------===//
 
-/// Initializes `VulkanRuntimeManager` and returns a pointer to it.
-VULKAN_WRAPPER_SYMBOL_EXPORT void *initVulkan() {
-  return new VulkanRuntimeManager();
-}
-
-/// Deinitializes `VulkanRuntimeManager` by the given pointer.
-VULKAN_WRAPPER_SYMBOL_EXPORT void deinitVulkan(void *vkRuntimeManager) {
-  delete reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager);
-}
-
-VULKAN_WRAPPER_SYMBOL_EXPORT void runOnVulkan(void *vkRuntimeManager) {
-  reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager)->runOnVulkan();
-}
-
-VULKAN_WRAPPER_SYMBOL_EXPORT void setEntryPoint(void *vkRuntimeManager,
-                                                const char *entryPoint) {
-  reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager)
-      ->setEntryPoint(entryPoint);
-}
-
-VULKAN_WRAPPER_SYMBOL_EXPORT void
-setNumWorkGroups(void *vkRuntimeManager, uint32_t x, uint32_t y, uint32_t z) {
-  reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager)
-      ->setNumWorkGroups({x, y, z});
-}
-
-VULKAN_WRAPPER_SYMBOL_EXPORT void
-setBinaryShader(void *vkRuntimeManager, uint8_t *shader, uint32_t size) {
-  reinterpret_cast<VulkanRuntimeManager *>(vkRuntimeManager)
-      ->setShaderModule(shader, size);
-}
-
-/// Binds the given memref to the given descriptor set and descriptor
-/// index.
-#define DECLARE_BIND_MEMREF(size, type, typeName)                              \
-  VULKAN_WRAPPER_SYMBOL_EXPORT void bindMemRef##size##D##typeName(             \
-      void *vkRuntimeManager, DescriptorSetIndex setIndex,                     \
-      BindingIndex bindIndex, MemRefDescriptor<type, size> *ptr) {             \
-    bindMemRef<type, size>(vkRuntimeManager, setIndex, bindIndex, ptr);        \
-  }
-
-DECLARE_BIND_MEMREF(1, float, Float)
-DECLARE_BIND_MEMREF(2, float, Float)
-DECLARE_BIND_MEMREF(3, float, Float)
-DECLARE_BIND_MEMREF(1, int32_t, Int32)
-DECLARE_BIND_MEMREF(2, int32_t, Int32)
-DECLARE_BIND_MEMREF(3, int32_t, Int32)
-DECLARE_BIND_MEMREF(1, int16_t, Int16)
-DECLARE_BIND_MEMREF(2, int16_t, Int16)
-DECLARE_BIND_MEMREF(3, int16_t, Int16)
-DECLARE_BIND_MEMREF(1, int8_t, Int8)
-DECLARE_BIND_MEMREF(2, int8_t, Int8)
-DECLARE_BIND_MEMREF(3, int8_t, Int8)
-DECLARE_BIND_MEMREF(1, int16_t, Half)
-DECLARE_BIND_MEMREF(2, int16_t, Half)
-DECLARE_BIND_MEMREF(3, int16_t, Half)
-
 /// Fills the given 1D float memref with the given float value.
 VULKAN_WRAPPER_SYMBOL_EXPORT void
 _mlir_ciface_fillResource1DFloat(MemRefDescriptor<float, 1> *ptr, // NOLINT
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index e776722a818d7..29089cb28a5a8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1889,59 +1889,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
-  // The following loop is workaround until we private ops' alloca regions to be
-  // "pure". See
-  // https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084/7
-  // and https://discourse.llvm.org/t/delayed-privatization-for-omp-wsloop/83989
-  // for more info.
-  for (auto [privateVar, privateDeclOp] :
-       llvm::zip_equal(mlirPrivateVars, privateDecls)) {
-    llvm::Value *llvmValue = moduleTranslation.lookupValue(privateVar);
-    bool isAllocArgUsed =
-        !privateDeclOp.getAllocRegion().args_begin()->use_empty();
-
-    // If the alloc region argument is not used, we can skip the workaround.
-    if (!isAllocArgUsed)
-      continue;
-
-    llvm::Instruction *definingInst =
-        llvm::dyn_cast<llvm::Instruction>(llvmValue);
-
-    // If the alloc region argument is not defined by an op, it has to dominate
-    // the current alloc IP. So we skip the workaround.
-    if (!definingInst)
-      continue;
-
-    llvm::BasicBlock *definingBlock = definingInst->getParent();
-    llvm::Function *definingFun = definingBlock->getParent();
-    llvm::Function *allocaFun = allocaIP.getBlock()->getParent();
-
-    // If the alloc region argument is defined in a different function that
-    // current one where allocs are being inserted (for example, we are building
-    // the outlined function of a target region), we skip the workaround.
-    if (definingFun != allocaFun)
-      continue;
-
-    llvm::DominatorTree dt(*definingFun);
-    // If the defining instruction of the alloc region argument dominates the
-    // alloca insertion point already, we can skip the workaround.
-    if (dt.dominates(definingInst, allocaIP.getPoint()))
-      continue;
-
-    // If all the above conditions are violated, then we have to move the alloca
-    // insertion point below the defining instruction.
-
-    if (definingBlock->getTerminator() == nullptr) {
-      assert(builder.GetInsertBlock() == definingBlock);
-      builder.SetInsertPoint(splitBB(llvm::OpenMPIRBuilder::InsertPointTy(
-                                         definingBlock, definingBlock->end()),
-                                     true, "omp.region.after_defining_block"));
-    }
-
-    allocaIP = llvm::OpenMPIRBuilder::InsertPointTy(
-        definingBlock, definingBlock->getTerminator()->getIterator());
-  }
-
   SmallVector<llvm::Value *> privateReductionVariables(
       wsloopOp.getNumReductionVars());
 
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 58d16a657297e..69c2e59786892 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -206,7 +206,7 @@ endif()
 
 if(MLIR_ENABLE_VULKAN_RUNNER)
   list(APPEND MLIR_TEST_DEPENDS
-    mlir-vulkan-runner
+    mlir_vulkan_runtime
   )
 endif()
 
diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-bare-ptr-intersperse-size.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-bare-ptr-intersperse-size.mlir
new file mode 100644
index 0000000000000..171b13da22713
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-bare-ptr-intersperse-size.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s --gpu-to-llvm="use-bare-pointers-for-kernels=1 intersperse-sizes-for-kernels=1" -split-input-file | FileCheck %s
+
+module attributes {gpu.container_module, spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>} {
+  llvm.func @malloc(i64) -> !llvm.ptr
+  gpu.binary @kernels  [#gpu.object<#spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>, "">]
+  func.func @main() attributes {llvm.emit_c_interface} {
+    // CHECK: [[RANK1UMD:%.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+    %rank1UndefMemrefDescriptor = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+    // CHECK: [[RANK2UMD:%.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+    %rank2UndefMemrefDescriptor = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+    %c1 = arith.constant 1 : index
+    // CHECK: [[PTR1:%.*]] = llvm.extractvalue [[RANK1UMD]][1]
+    // CHECK: [[PTR2:%.*]] = llvm.extractvalue [[RANK2UMD]][1]
+    // CHECK: [[PTR3:%.*]] = llvm.extractvalue [[RANK2UMD]][1]
+    // CHECK: [[SIZE1:%.*]] = llvm.mlir.constant(32 : index) : i64
+    // CHECK: [[SIZE2:%.*]] = llvm.mlir.constant(256 : index) : i64
+    // CHECK: [[SIZE3:%.*]] = llvm.mlir.constant(48 : index) : i64
+    %6 = builtin.unrealized_conversion_cast %rank1UndefMemrefDescriptor : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<8xf32>
+    %10 = builtin.unrealized_conversion_cast %rank2UndefMemrefDescriptor : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<8x8xi32>
+    %14 = builtin.unrealized_conversion_cast %rank2UndefMemrefDescriptor : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<4x12xi8>
+    // CHECK: gpu.launch_func  @kernels::@kernel_add blocks in ({{.*}}) threads in ({{.*}}) : i64 args([[PTR1]] : !llvm.ptr, [[SIZE1]] : i64, [[PTR2]] : !llvm.ptr, [[SIZE2]] : i64, [[PTR3]] : !llvm.ptr, [[SIZE3]] : i64)
+    gpu.launch_func  @kernels::@kernel_add blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1)  args(%6 : memref<8xf32>, %10 : memref<8x8xi32>, %14 : memref<4x12xi8>)
+    return
+  }
+}
diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
index 2242786fe6759..4d2ae8c39240c 100644
--- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir
+++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
@@ -1,13 +1,15 @@
 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
 
-  func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) {
+// CHECK-LABEL: @warp_extract
+// CHECK-SAME: %[[VEC:[a-zA-Z0-9_]+]]: vector<1xf32>
+// CHECK:%[[BASE:[0-9]+]] = llvm.extractvalue
+// CHECK:%[[PTR:[0-9]+]] = llvm.getelementptr %[[BASE]]
+// CHECK:llvm.store %[[VEC]], %[[PTR]] {alignment = 4 : i64} : vector<1xf32>, !llvm.ptr
+
+func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: vector<1xf32>) {
     %c0 = arith.constant 0 : index
     gpu.warp_execute_on_lane_0(%arg0)[32] {
-      // CHECK:%[[val:[0-9]+]] = llvm.extractelement
-      // CHECK:%[[base:[0-9]+]] = llvm.extractvalue
-      // CHECK:%[[ptr:[0-9]+]] = llvm.getelementptr %[[base]]
-      // CHECK:llvm.store %[[val]], %[[ptr]]
-      vector.transfer_write %arg3, %arg1[%c0, %c0] {in_bounds = [true]} : vector<1xf32>, memref<1024x1024xf32>
+      vector.transfer_write %arg2, %arg1[%c0, %c0] {in_bounds = [true]} : vector<1xf32>, memref<1024x1024xf32>
     }
     return
   }
diff --git a/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir b/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir
deleted file mode 100644
index fe8a36ee29a9f..0000000000000
--- a/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir
+++ /dev/null
@@ -1,62 +0,0 @@
-// RUN: mlir-opt %s -launch-func-to-vulkan | FileCheck %s
-
-// CHECK: llvm.mlir.global internal constant @kernel_spv_entry_point_name
-// CHECK: llvm.mlir.global internal constant @SPIRV_BIN
-// CHECK: %[[Vulkan_Runtime_ptr:.*]] = llvm.call @initVulkan() : () -> !llvm.ptr
-// CHECK: %[[addressof_SPIRV_BIN:.*]] = llvm.mlir.addressof @SPIRV_BIN
-// CHECK: %[[SPIRV_BIN_ptr:.*]] = llvm.getelementptr %[[addressof_SPIRV_BIN]]
-// CHECK: %[[SPIRV_BIN_size:.*]] = llvm.mlir.constant
-// CHECK: llvm.call @bindMemRef1DFloat(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i32, i32, !llvm.ptr) -> ()
-// CHECK: llvm.call @setBinaryShader(%[[Vulkan_Runtime_ptr]], %[[SPIRV_BIN_ptr]], %[[SPIRV_BIN_size]]) : (!llvm.ptr, !llvm.ptr, i32) -> ()
-// CHECK: %[[addressof_entry_point:.*]] = llvm.mlir.addressof @kernel_spv_entry_point_name
-// CHECK: %[[entry_point_ptr:.*]] = llvm.getelementptr %[[addressof_entry_point]]
-// CHECK: llvm.call @setEntryPoint(%[[Vulkan_Runtime_ptr]], %[[entry_point_ptr]]) : (!llvm.ptr, !llvm.ptr) -> ()
-// CHECK: llvm.call @setNumWorkGroups(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64) -> ()
-// CHECK: llvm.call @runOnVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm.ptr) -> ()
-// CHECK: llvm.call @deinitVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm.ptr) -> ()
-
-// CHECK: llvm.func @bindMemRef1DHalf(!llvm.ptr, i32, i32, !llvm.ptr)
-
-module attributes {gpu.container_module} {
-  llvm.func @malloc(i64) -> !llvm.ptr
-  llvm.func @foo() {
-    %0 = llvm.mlir.constant(12 : index) : i64
-    %1 = llvm.mlir.zero : !llvm.ptr
-    %2 = llvm.mlir.constant(1 : index) : i64
-    %3 = llvm.getelementptr %1[%2] : (!llvm.ptr, i64) -> !llvm.ptr, f32
-    %4 = llvm.ptrtoint %3 : !llvm.ptr to i64
-    %5 = llvm.mul %0, %4 : i64
-    %6 = llvm.call @malloc(%5) : (i64) -> !llvm.ptr
-    %8 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %9 = llvm.insertvalue %6, %8[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %10 = llvm.insertvalue %6, %9[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %11 = llvm.mlir.constant(0 : index) : i64
-    %12 = llvm.insertvalue %11, %10[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %13 = llvm.mlir.constant(1 : index) : i64
-    %14 = llvm.insertvalue %0, %12[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %15 = llvm.insertvalue %13, %14[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %16 = llvm.mlir.constant(1 : index) : i64
-    %17 = llvm.extractvalue %15[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %18 = llvm.extractvalue %15[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %19 = llvm.extractvalue %15[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %20 = llvm.extractvalue %15[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %21 = llvm.extractvalue %15[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    llvm.call @vulkanLaunch(%16, %16, %16, %17, %18, %19, %20, %21) {spirv_blob = "\03\02#\07\00", spirv_element_types = [f32], spirv_entry_point = "kernel"}
-    : (i64, i64, i64, !llvm.ptr, !llvm.ptr, i64, i64, i64) -> ()
-    llvm.return
-  }
-  llvm.func @vulkanLaunch(%arg0: i64, %arg1: i64, %arg2: i64, %arg6: !llvm.ptr, %arg7: !llvm.ptr, %arg8: i64, %arg9: i64, %arg10: i64) {
-    %0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %1 = llvm.insertvalue %arg6, %0[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %2 = llvm.insertvalue %arg7, %1[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %3 = llvm.insertvalue %arg8, %2[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %4 = llvm.insertvalue %arg9, %3[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %5 = llvm.insertvalue %arg10, %4[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-    %6 = llvm.mlir.constant(1 : index) : i64
-    %7 = llvm.alloca %6 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr
-    llvm.store %5, %7 : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, !llvm.ptr
-    llvm.call @_mlir_ciface_vulkanLaunch(%arg0, %arg1, %arg2, %7) : (i64, i64, i64, !llvm.ptr) -> ()
-    llvm.return
-  }
-  llvm.func @_mlir_ciface_vulkanLaunch(i64, i64, i64, !llvm.ptr)
-}
diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
deleted file mode 100644
index 96ee1866517e6..0000000000000
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ /dev/null
@@ -1,35 +0,0 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Shader exts=SPV_KHR_storage_buffer_storage_class},gpu-module-to-binary,convert-gpu-launch-to-vulkan-launch)' | FileCheck %s
-
-// CHECK: %[[resource:.*]] = memref.alloc() : memref<12xf32>
-// CHECK: %[[index:.*]] = arith.constant 1 : index
-// CHECK: call @vulkanLaunch(%[[index]], %[[index]], %[[index]], %[[resource]]) {spirv_blob = "{{.*}}", spirv_element_types = [f32], spirv_entry_point = "kernel"}
-
-module attributes {gpu.container_module} {
-  gpu.module @kernels {
-    spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
-      spirv.GlobalVariable @kernel_arg_0 bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-      spirv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} {
-        %0 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-        %2 = spirv.Constant 0 : i32
-        %3 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-        %4 = spirv.AccessChain %0[%2, %2] : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>, i32, i32 -> !spirv.ptr<f32, StorageBuffer>
-        %5 = spirv.Load "StorageBuffer" %4 : f32
-        spirv.Return
-      }
-      spirv.EntryPoint "GLCompute" @kernel
-      spirv.ExecutionMode @kernel "LocalSize", 1, 1, 1
-    }
-    gpu.func @kernel(%arg0: memref<12xf32>) kernel {
-      gpu.return
-    }
-  }
-  func.func @foo() {
-    %0 = memref.alloc() : memref<12xf32>
-    %c1 = arith.constant 1 : index
-    gpu.launch_func @kernels::@kernel
-        blocks in(%c1, %c1, %c1)
-        threads in(%c1, %c1, %c1)
-        args(%0 : memref<12xf32>)
-    return
-  }
-}
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 931cc36c9d4a8..29bed9aae5682 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -3292,13 +3292,17 @@ func.func @load_0d(%memref : memref<200x100xf32>, %i : index, %j : index) -> vec
 }
 
 // CHECK-LABEL: func @load_0d
-// CHECK: %[[LOAD:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}]
-// CHECK: %[[VEC:.*]] = llvm.mlir.undef : vector<1xf32>
-// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[INSERTED:.*]] = llvm.insertelement %[[LOAD]], %[[VEC]][%[[C0]] : i32] : vector<1xf32>
-// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[INSERTED]] : vector<1xf32> to vector<f32>
-// CHECK: return %[[CAST]] : vector<f32>
-
+// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
+// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
+// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
+// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64
+// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64
+// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+// CHECK: %[[LOAD:.*]] = llvm.load %[[ADDR]] {alignment = 4 : i64} : !llvm.ptr -> vector<1xf32>
+// CHECK: %[[RES:.*]] = builtin.unrealized_conversion_cast %[[LOAD]] : vector<1xf32> to vector<f32>
+// CHECK: return %[[RES]] : vector<f32>
 // -----
 
 //===----------------------------------------------------------------------===//
@@ -3392,11 +3396,18 @@ func.func @store_0d(%memref : memref<200x100xf32>, %i : index, %j : index) {
 }
 
 // CHECK-LABEL: func @store_0d
-// CHECK: %[[VAL:.*]] = arith.constant dense<1.100000e+01> : vector<f32>
-// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[VAL]] : vector<f32> to vector<1xf32>
-// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[EXTRACTED:.*]] = llvm.extractelement %[[CAST]][%[[C0]] : i64] : vector<1xf32>
-// CHECK: memref.store %[[EXTRACTED]], %{{.*}}[%{{.*}}, %{{.*}}]
+// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
+// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
+// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[CST:.*]] = arith.constant dense<1.100000e+01> : vector<f32>
+// CHECK: %[[VAL:.*]] = builtin.unrealized_conversion_cast %[[CST]] : vector<f32> to vector<1xf32>
+// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
+// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64
+// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64
+// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+// CHECK: llvm.store %[[VAL]], %[[ADDR]] {alignment = 4 : i64} : vector<1xf32>, !llvm.ptr
+// CHECK: return
 
 // -----
 
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 92789246edb4f..c80ebebaafe3a 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -227,6 +227,32 @@ func.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
   llvm.return
 }
 
+llvm.func @rocdl.ds.read.tr(%ptr : !llvm.ptr<3>) -> vector<4xf16> {
+  // CHECK-LABEL: rocdl.ds.read.tr
+  // CHECK: rocdl.ds.read.tr4.b64 {{.*}} : <3> -> vector<2xi32>
+  %r0 = rocdl.ds.read.tr4.b64 %ptr : !llvm.ptr<3> -> vector<2xi32>
+  // CHECK: rocdl.ds.read.tr6.b96 {{.*}} : <3> -> vector<3xi32>
+  %r1 = rocdl.ds.read.tr6.b96 %ptr : !llvm.ptr<3> -> vector<3xi32>
+  // CHECK: rocdl.ds.read.tr8.b64 {{.*}} : <3> -> vector<2xi32>
+  %r2 = rocdl.ds.read.tr8.b64 %ptr : !llvm.ptr<3> -> vector<2xi32>
+  // CHECK: rocdl.ds.read.tr16.b64 {{.*}} : <3> -> vector<4xf16>
+  %r3 = rocdl.ds.read.tr16.b64 %ptr : !llvm.ptr<3> -> vector<4xf16>
+  // CHECK: rocdl.ds.read.tr16.b64 {{.*}} : <3> -> vector<4xbf16>
+  %r4 = rocdl.ds.read.tr16.b64 %ptr : !llvm.ptr<3> -> vector<4xbf16>
+  llvm.return %r3 : vector<4xf16>
+}
+
+llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
+  %aux = llvm.mlir.constant(0 : i32) : i32
+  %offset = llvm.mlir.constant(0 : i32) : i32
+  %size = llvm.mlir.constant(10 : i32) : i32
+
+  //CHECK: rocdl.global.load.lds %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}
+  rocdl.global.load.lds %src, %dst, %size, %offset, %aux
+
+  llvm.return
+}
+
 llvm.func @rocdl.make.buffer.rsrc(%ptr : !llvm.ptr,
                                   %stride : i16,
                                   %numRecords : i32,
diff --git a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir
index f90111b4c8861..fd50acf03e79b 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir
@@ -6,16 +6,13 @@
 func.func @vector_transfer_ops_0d_memref(%mem: memref<f32>, %vec: vector<1x1x1xf32>) {
     %f0 = arith.constant 0.0 : f32
 
-//  CHECK-NEXT:   %[[S:.*]] = memref.load %[[MEM]][] : memref<f32>
-//  CHECK-NEXT:   %[[V:.*]] = vector.broadcast %[[S]] : f32 to vector<f32>
+//  CHECK-NEXT:   %[[S:.*]] = vector.load %[[MEM]][] : memref<f32>, vector<f32>
     %0 = vector.transfer_read %mem[], %f0 : memref<f32>, vector<f32>
 
-//  CHECK-NEXT:   %[[SS:.*]] = vector.extractelement %[[V]][] : vector<f32>
-//  CHECK-NEXT:   memref.store %[[SS]], %[[MEM]][] : memref<f32>
+//  CHECK-NEXT:   vector.store %[[S]], %[[MEM]][] : memref<f32>, vector<f32>
     vector.transfer_write %0, %mem[] : vector<f32>, memref<f32>
 
-//  CHECK-NEXT:   %[[VV:.*]] = vector.extract %arg1[0, 0, 0] : f32 from vector<1x1x1xf32>
-//  CHECK-NEXT:   memref.store %[[VV]], %[[MEM]][] : memref<f32>
+//  CHECK-NEXT:   vector.store %[[VEC]], %[[MEM]][] : memref<f32>, vector<1x1x1xf32>
     vector.store %vec, %mem[] : memref<f32>, vector<1x1x1xf32>
 
     return
@@ -191,8 +188,8 @@ func.func @transfer_perm_map(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf
 // CHECK-LABEL:   func @transfer_broadcasting(
 // CHECK-SAME:      %[[MEM:.*]]: memref<8x8xf32>,
 // CHECK-SAME:      %[[IDX:.*]]: index) -> vector<4xf32> {
-// CHECK-NEXT:      %[[LOAD:.*]] = memref.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
-// CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : f32 to vector<4xf32>
+// CHECK-NEXT:      %[[LOAD:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<1xf32>
+// CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : vector<1xf32> to vector<4xf32>
 // CHECK-NEXT:      return %[[RES]] : vector<4xf32>
 // CHECK-NEXT:    }
 
@@ -208,8 +205,7 @@ func.func @transfer_broadcasting(%mem : memref<8x8xf32>, %idx : index) -> vector
 // CHECK-LABEL:   func @transfer_scalar(
 // CHECK-SAME:      %[[MEM:.*]]: memref<?x?xf32>,
 // CHECK-SAME:      %[[IDX:.*]]: index) -> vector<1xf32> {
-// CHECK-NEXT:      %[[LOAD:.*]] = memref.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<?x?xf32>
-// CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : f32 to vector<1xf32>
+// CHECK-NEXT:      %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<?x?xf32>, vector<1xf32>
 // CHECK-NEXT:      return %[[RES]] : vector<1xf32>
 // CHECK-NEXT:    }
 func.func @transfer_scalar(%mem : memref<?x?xf32>, %idx : index) -> vector<1xf32> {
@@ -222,8 +218,8 @@ func.func @transfer_scalar(%mem : memref<?x?xf32>, %idx : index) -> vector<1xf32
 // CHECK-LABEL:   func @transfer_broadcasting_2D(
 // CHECK-SAME:      %[[MEM:.*]]: memref<8x8xf32>,
 // CHECK-SAME:      %[[IDX:.*]]: index) -> vector<4x4xf32> {
-// CHECK-NEXT:      %[[LOAD:.*]] = memref.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
-// CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : f32 to vector<4x4xf32>
+// CHECK-NEXT:      %[[LOAD:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<1x1xf32>
+// CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : vector<1x1xf32> to vector<4x4xf32>
 // CHECK-NEXT:      return %[[RES]] : vector<4x4xf32>
 // CHECK-NEXT:    }
 
@@ -322,8 +318,8 @@ func.func @transfer_read_permutations(%mem_0 : memref<?x?xf32>, %mem_1 : memref<
 // CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32>
 
   %6 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [true], permutation_map = #map6} : memref<?x?xf32>, vector<8xf32>
-// CHECK: memref.load %{{.*}}[%[[C0]], %[[C0]]] : memref<?x?xf32>
-// CHECK: vector.broadcast %{{.*}} : f32 to vector<8xf32>
+// CHECK: vector.load %{{.*}}[%[[C0]], %[[C0]]] : memref<?x?xf32>, vector<1xf32>
+// CHECK: vector.broadcast %{{.*}} : vector<1xf32> to vector<8xf32>
 
   return %0, %1, %2, %3, %4, %5, %6 : vector<7x14x8x16xf32>, vector<7x14x8x16xf32>,
          vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, vector<7x14x8x16xf32>,
diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/Integration/GPU/Vulkan/addf.mlir
similarity index 91%
rename from mlir/test/mlir-vulkan-runner/addf.mlir
rename to mlir/test/Integration/GPU/Vulkan/addf.mlir
index 71f87a8b0d5c8..f4d2463d413b8 100644
--- a/mlir/test/mlir-vulkan-runner/addf.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/addf.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
-// RUN:   | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK: [3.3,  3.3,  3.3,  3.3,  3.3,  3.3,  3.3,  3.3]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/addf_if.mlir b/mlir/test/Integration/GPU/Vulkan/addf_if.mlir
similarity index 91%
rename from mlir/test/mlir-vulkan-runner/addf_if.mlir
rename to mlir/test/Integration/GPU/Vulkan/addf_if.mlir
index 6fe51a83482dc..2512d0f8e6efa 100644
--- a/mlir/test/mlir-vulkan-runner/addf_if.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/addf_if.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
-// RUN:   | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK: [3.3,  3.3,  3.3,  3.3,  0,  0,  0,  0]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/addi.mlir b/mlir/test/Integration/GPU/Vulkan/addi.mlir
similarity index 94%
rename from mlir/test/mlir-vulkan-runner/addi.mlir
rename to mlir/test/Integration/GPU/Vulkan/addi.mlir
index 7e212a4fb179c..abf695c61f3b3 100644
--- a/mlir/test/mlir-vulkan-runner/addi.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/addi.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-vulkan-runner-pipeline \
-// RUN:   | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK-COUNT-64: [3, 3, 3, 3, 3, 3, 3, 3]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/addi8.mlir b/mlir/test/Integration/GPU/Vulkan/addi8.mlir
similarity index 94%
rename from mlir/test/mlir-vulkan-runner/addi8.mlir
rename to mlir/test/Integration/GPU/Vulkan/addi8.mlir
index e0b1a8e8875c0..fd43422fbafad 100644
--- a/mlir/test/mlir-vulkan-runner/addi8.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/addi8.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-vulkan-runner-pipeline \
-// RUN:   | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK-COUNT-64: [3, 3, 3, 3, 3, 3, 3, 3]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/addui_extended.mlir b/mlir/test/Integration/GPU/Vulkan/addui_extended.mlir
similarity index 90%
rename from mlir/test/mlir-vulkan-runner/addui_extended.mlir
rename to mlir/test/Integration/GPU/Vulkan/addui_extended.mlir
index 0894bc301f2e3..d048b7b0290a9 100644
--- a/mlir/test/mlir-vulkan-runner/addui_extended.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/addui_extended.mlir
@@ -1,14 +1,14 @@
 // Make sure that addition with carry produces expected results
 // with and without expansion to primitive add/cmp ops for WebGPU.
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [0, 42, 0, 42]
diff --git a/mlir/test/mlir-vulkan-runner/lit.local.cfg b/mlir/test/Integration/GPU/Vulkan/lit.local.cfg
similarity index 100%
rename from mlir/test/mlir-vulkan-runner/lit.local.cfg
rename to mlir/test/Integration/GPU/Vulkan/lit.local.cfg
diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/Integration/GPU/Vulkan/mulf.mlir
similarity index 94%
rename from mlir/test/mlir-vulkan-runner/mulf.mlir
rename to mlir/test/Integration/GPU/Vulkan/mulf.mlir
index 22fa034a9b455..f1f71bca73c3e 100644
--- a/mlir/test/mlir-vulkan-runner/mulf.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/mulf.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-vulkan-runner-pipeline \
-// RUN:   | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK-COUNT-4: [6, 6, 6, 6]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/smul_extended.mlir b/mlir/test/Integration/GPU/Vulkan/smul_extended.mlir
similarity index 90%
rename from mlir/test/mlir-vulkan-runner/smul_extended.mlir
rename to mlir/test/Integration/GPU/Vulkan/smul_extended.mlir
index 0ef86f46562e8..ac46b9035c13c 100644
--- a/mlir/test/mlir-vulkan-runner/smul_extended.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/smul_extended.mlir
@@ -1,14 +1,14 @@
 // Make sure that signed extended multiplication produces expected results
 // with and without expansion to primitive mul/add ops for WebGPU.
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [0, 1, -2,  1, 1048560, -87620295, -131071,  560969770]
diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/Integration/GPU/Vulkan/subf.mlir
similarity index 94%
rename from mlir/test/mlir-vulkan-runner/subf.mlir
rename to mlir/test/Integration/GPU/Vulkan/subf.mlir
index 23496ef3abc00..50c63abc8c10a 100644
--- a/mlir/test/mlir-vulkan-runner/subf.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/subf.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-vulkan-runner-pipeline \
-// RUN:   | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK-COUNT-32: [2.2, 2.2, 2.2, 2.2]
 module attributes {
diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/Integration/GPU/Vulkan/time.mlir
similarity index 92%
rename from mlir/test/mlir-vulkan-runner/time.mlir
rename to mlir/test/Integration/GPU/Vulkan/time.mlir
index f628447874238..f506f6be15df2 100644
--- a/mlir/test/mlir-vulkan-runner/time.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/time.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
-// RUN:   | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
+// RUN:   | mlir-cpu-runner - --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils --entry-point-result=void | FileCheck %s
 
 // CHECK: Compute shader execution time
 // CHECK: Command buffer submit time
diff --git a/mlir/test/mlir-vulkan-runner/umul_extended.mlir b/mlir/test/Integration/GPU/Vulkan/umul_extended.mlir
similarity index 90%
rename from mlir/test/mlir-vulkan-runner/umul_extended.mlir
rename to mlir/test/Integration/GPU/Vulkan/umul_extended.mlir
index 5936c808435c1..0f01a88429693 100644
--- a/mlir/test/mlir-vulkan-runner/umul_extended.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/umul_extended.mlir
@@ -1,14 +1,14 @@
 // Make sure that unsigned extended multiplication produces expected results
 // with and without expansion to primitive mul/add ops for WebGPU.
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [0, 1, -2,  1, 1048560, -87620295, -131071, -49]
diff --git a/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir b/mlir/test/Integration/GPU/Vulkan/vector-deinterleave.mlir
similarity index 96%
rename from mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir
rename to mlir/test/Integration/GPU/Vulkan/vector-deinterleave.mlir
index ebeb19cd6bcc5..4e3f6ace15f6b 100644
--- a/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/vector-deinterleave.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [0, 2]
diff --git a/mlir/test/mlir-vulkan-runner/vector-interleave.mlir b/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir
similarity index 96%
rename from mlir/test/mlir-vulkan-runner/vector-interleave.mlir
rename to mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir
index 9314baf9b39c7..f7f620bf766b8 100644
--- a/mlir/test/mlir-vulkan-runner/vector-interleave.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [0, 2, 1, 3]
diff --git a/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir b/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir
similarity index 96%
rename from mlir/test/mlir-vulkan-runner/vector-shuffle.mlir
rename to mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir
index cf3e2c569426b..0f9c883091b89 100644
--- a/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir
+++ b/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline \
 // RUN:   | mlir-cpu-runner - \
-// RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:     --shared-libs=%mlir_vulkan_runtime,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
 // CHECK: [2, 1, 3, 3]
diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-private-late-alloca-workaround.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-private-late-alloca-workaround.mlir
deleted file mode 100644
index 4d732bbb4e3b6..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-wsloop-private-late-alloca-workaround.mlir
+++ /dev/null
@@ -1,47 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// Tests the "impure" alloc region workaround until `omp.private` is updated.
-// See
-// https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084/7
-// and https://discourse.llvm.org/t/delayed-privatization-for-omp-wsloop/83989
-// for more info.
-
-omp.private {type = private} @impure_alloca_privatizer : !llvm.ptr alloc {
-^bb0(%arg0: !llvm.ptr):
-  %0 = llvm.mlir.constant(1 : i64) : i64
-  %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
-  %3 = llvm.getelementptr %arg0[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr)>
-  omp.yield(%1 : !llvm.ptr)
-}
-
-llvm.func @test_alloca_ip_workaround() {
-  omp.target {
-    %65 = llvm.mlir.constant(1 : i32) : i32
-    %66 = llvm.alloca %65 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-    %67 = llvm.mlir.constant(0 : index) : i64
-    %68 = llvm.mlir.constant(10 : i32) : i32
-    %69 = llvm.mlir.constant(1 : i32) : i32
-    omp.wsloop private(@impure_alloca_privatizer %66 -> %arg6 : !llvm.ptr) {
-      omp.loop_nest (%arg8) : i32 = (%69) to (%68) inclusive step (%69) {
-        omp.yield
-      }
-    }
-    omp.terminator
-  }
-  llvm.return
-}
-
-// CHECK-LABEL: define {{.*}} @__omp_offloading_{{.*}}_test_alloca_ip_workaround
-
-// CHECK:       omp.target:
-// CHECK:         %[[ALLOC_REG_ARG:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-// CHECK:         br label %omp.private.latealloc
-
-// CHECK:       omp.private.latealloc:
-// CHECK:         %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
-// The usage of `ALLOC_REG_ARG` in the inlined alloc region is the reason for
-// introducing the workaround.
-// CHECK:         %{{.*}} = getelementptr { ptr }, ptr %[[ALLOC_REG_ARG]], i32 0
-// CHECK:         br label %omp.region.after_defining_block
-
-
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 0620c23b5fdad..8879ba02b2405 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -219,7 +219,9 @@ llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
                    %arg4 : vector<16 x f32>, %arg5 : vector<4xf32>,
                    %arg6 : vector<4xf16>, %arg7 : vector<32 x i32>,
                    %arg8 : vector<16 x i32>, %arg9 : vector<4xi32>,
-                   %arg10 : vector<2xi16>, %arg11 : i64) -> vector<32 x f32> {
+                   %arg10 : vector<2xi16>, %arg11 : i64,
+                   %arg12 : vector<8xbf16>, %arg13 : vector<4xi32>,
+                   %arg14 : vector<8xf16>) -> vector<32 x f32> {
   %csti32 = llvm.mlir.constant(42 : i32) : i32
 
   // CHECK-LABEL: rocdl.xdlops
@@ -362,6 +364,37 @@ llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
   %r27 = rocdl.mfma.f32.32x32x16.bf8.bf8 %arg11, %arg11, %arg4, %csti32, %csti32, %csti32 :
                             (i64, i64, vector<16xf32>,
                             i32, i32, i32) -> vector<16xf32>
+
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r28 = rocdl.mfma.f32.16x16x32.bf16 %arg12, %arg12, %arg5, %csti32, %csti32, %csti32 :
+                              (vector<8xbf16>, vector<8xbf16>, vector<4xf32>,
+                               i32, i32, i32) -> vector<4xf32>
+
+  // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r29 = rocdl.mfma.i32.16x16x64.i8 %arg9, %arg9, %arg9, %csti32, %csti32, %csti32 :
+                              (vector<4xi32>, vector<4xi32>, vector<4xi32>,
+                               i32, i32, i32) -> vector<4xi32>
+
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r30 = rocdl.mfma.f32.16x16x32.f16 %arg14, %arg14, %arg5, %csti32, %csti32, %csti32 :
+                               (vector<8xf16>, vector<8xf16>, vector<4xf32>,
+                                i32, i32, i32) -> vector<4xi32>
+
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %1{{.*}}, <8 x bfloat> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r31 = rocdl.mfma.f32.32x32x16.bf16 %arg12, %arg12, %arg4, %csti32, %csti32, %csti32 :
+                               (vector<8xbf16>, vector<8xbf16>, vector<16xf32>,
+                                i32, i32, i32) -> vector<16xf32>
+
+  // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r32 = rocdl.mfma.i32.32x32x32.i8 %arg9, %arg9, %arg8, %csti32, %csti32, %csti32 :
+                               (vector<4xi32>, vector<4xi32>, vector<16xi32>,
+                                i32, i32, i32) -> vector<16xi32>
+
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r33 = rocdl.mfma.f32.32x32x16.f16 %arg14, %arg14, %arg4, %csti32, %csti32, %csti32 :
+                               (vector<8xf16>, vector<8xf16>, vector<16xf32>,
+                                i32, i32, i32) -> vector<16xf32>
+
   llvm.return %r0 : vector<32 x f32>
 }
 
@@ -424,6 +457,30 @@ llvm.func @rocdl.wmma(%arg0 : vector<8xf32>, %arg1 : vector<16 x f16>, %arg2 : v
   llvm.return %r0 : vector<8xf32>
 }
 
+llvm.func @rocdl.ds.read.tr(%ptr : !llvm.ptr<3>) -> vector<4xf16> {
+  // CHECK-LABEL: rocdl.ds.read.tr
+  // CHECK: call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %0)
+  %r0 = rocdl.ds.read.tr4.b64 %ptr : !llvm.ptr<3> -> vector<2xi32>
+  // CHECK: call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32(ptr addrspace(3) %0)
+  %r1 = rocdl.ds.read.tr6.b96 %ptr : !llvm.ptr<3> -> vector<3xi32>
+  // CHECK: call <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32(ptr addrspace(3) %0)
+  %r2 = rocdl.ds.read.tr8.b64 %ptr : !llvm.ptr<3> -> vector<2xi32>
+  // CHECK: call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %0)
+  %r3 = rocdl.ds.read.tr16.b64 %ptr : !llvm.ptr<3> -> vector<4xf16>
+  // CHECK: call <4 x bfloat> @llvm.amdgcn.ds.read.tr16.b64.v4bf16(ptr addrspace(3) %0)
+  %r4 = rocdl.ds.read.tr16.b64 %ptr : !llvm.ptr<3> -> vector<4xbf16>
+  llvm.return %r3 : vector<4xf16>
+}
+
+llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
+  %aux = llvm.mlir.constant(0 : i32) : i32
+  %offset = llvm.mlir.constant(0 : i32) : i32
+  %size = llvm.mlir.constant(10 : i32) : i32
+  //CHECK: call void @llvm.amdgcn.global.load.lds
+  rocdl.global.load.lds %src, %dst, %size, %offset, %aux
+  llvm.return
+}
+
 llvm.func @rocdl.make.buffer.rsrc(%ptr : !llvm.ptr,
                                   %stride : i16,
                                   %numRecords : i32,
diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt
index 7c6b31ae8b73e..91879981bffd2 100644
--- a/mlir/test/lib/Analysis/CMakeLists.txt
+++ b/mlir/test/lib/Analysis/CMakeLists.txt
@@ -21,12 +21,14 @@ add_mlir_library(MLIRTestAnalysis
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRTestAnalysis PUBLIC
   MLIRAffineDialect
   MLIRAnalysis
   MLIRFunctionInterfaces
   MLIRMemRefDialect
   MLIRPass
-  MLIRTestDialect
   )
 
 target_include_directories(MLIRTestAnalysis
diff --git a/mlir/test/lib/Conversion/ConvertToSPIRV/CMakeLists.txt b/mlir/test/lib/Conversion/ConvertToSPIRV/CMakeLists.txt
index aeade52c7ade5..fd557e0c989f5 100644
--- a/mlir/test/lib/Conversion/ConvertToSPIRV/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/ConvertToSPIRV/CMakeLists.txt
@@ -4,8 +4,8 @@ add_mlir_library(MLIRTestConvertToSPIRV
   TestSPIRVVectorUnrolling.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestConvertToSPIRV PUBLIC
   MLIRArithDialect
   MLIRFuncDialect
   MLIRPass
diff --git a/mlir/test/lib/Conversion/FuncToLLVM/CMakeLists.txt b/mlir/test/lib/Conversion/FuncToLLVM/CMakeLists.txt
index d3dbc94a99bc7..f190b2da5a190 100644
--- a/mlir/test/lib/Conversion/FuncToLLVM/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/FuncToLLVM/CMakeLists.txt
@@ -6,12 +6,14 @@ add_mlir_library(MLIRTestFuncToLLVM
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRTestFuncToLLVM PUBLIC
   MLIRFuncToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMDialect
   MLIRLLVMIRTransforms
   MLIRPass
-  MLIRTestDialect
   )
 
 target_include_directories(MLIRTestFuncToLLVM
diff --git a/mlir/test/lib/Conversion/MathToVCIX/CMakeLists.txt b/mlir/test/lib/Conversion/MathToVCIX/CMakeLists.txt
index 933e84722fbba..3726993b59692 100644
--- a/mlir/test/lib/Conversion/MathToVCIX/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/MathToVCIX/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRTestMathToVCIX
   TestMathToVCIXConversion.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRTestMathToVCIX PUBLIC
   MLIRArithDialect
   MLIRFuncDialect
   MLIRMathDialect
diff --git a/mlir/test/lib/Conversion/OneToNTypeConversion/CMakeLists.txt b/mlir/test/lib/Conversion/OneToNTypeConversion/CMakeLists.txt
index b72302202f72b..f0ea87946a983 100644
--- a/mlir/test/lib/Conversion/OneToNTypeConversion/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/OneToNTypeConversion/CMakeLists.txt
@@ -4,13 +4,15 @@ add_mlir_library(MLIRTestOneToNTypeConversionPass
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+ )
+mlir_target_link_libraries(MLIRTestOneToNTypeConversionPass PUBLIC
   MLIRFuncDialect
   MLIRFuncTransforms
   MLIRIR
   MLIRPass
   MLIRSCFDialect
   MLIRSCFTransforms
-  MLIRTestDialect
   MLIRTransformUtils
  )
 
diff --git a/mlir/test/lib/Conversion/VectorToSPIRV/CMakeLists.txt b/mlir/test/lib/Conversion/VectorToSPIRV/CMakeLists.txt
index 09ed283ac97bc..cb5ecc933ad82 100644
--- a/mlir/test/lib/Conversion/VectorToSPIRV/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/VectorToSPIRV/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRTestVectorToSPIRV
   TestVectorReductionToSPIRVDotProd.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestVectorToSPIRV PUBLIC
   MLIRVectorToSPIRV
   MLIRArithDialect
   MLIRFuncDialect
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index 33cefab9fa2ed..885bc9b4c3afc 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -21,6 +21,9 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   Core
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRAffineTransformsTestPasses PUBLIC
   MLIRArithTransforms
   MLIRAffineAnalysis
   MLIRAffineTransforms
@@ -30,7 +33,6 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   MLIRSupport
   MLIRMemRefDialect
   MLIRTensorDialect
-  MLIRTestDialect
   MLIRVectorUtils
   )
 
diff --git a/mlir/test/lib/Dialect/Arith/CMakeLists.txt b/mlir/test/lib/Dialect/Arith/CMakeLists.txt
index 33615ab21a177..0c05ed5ce6210 100644
--- a/mlir/test/lib/Dialect/Arith/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Arith/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRArithTestPasses
   TestEmulateWideInt.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRArithTestPasses PUBLIC
   MLIRArithDialect
   MLIRArithTransforms
   MLIRFuncDialect
diff --git a/mlir/test/lib/Dialect/ArmNeon/CMakeLists.txt b/mlir/test/lib/Dialect/ArmNeon/CMakeLists.txt
index 21548ca57701f..460842d238533 100644
--- a/mlir/test/lib/Dialect/ArmNeon/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/ArmNeon/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRArmNeonTestPasses
   TestLowerToArmNeon.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRArmNeonTestPasses PUBLIC
   MLIRArmNeonDialect
   MLIRArmNeonTransforms
   MLIRIR
diff --git a/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt
index cdd8afe141421..9ee93dec65943 100644
--- a/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRArmSMETestPasses
   TestLowerToArmSME.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRArmSMETestPasses PUBLIC
   MLIRArithToArmSME
   MLIRArmSMEToLLVM
   MLIRArmSMEToSCF
diff --git a/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt b/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt
index a183d02cefed7..c14a9f2cc9bb0 100644
--- a/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRBufferizationTestPasses
   TestTensorCopyInsertion.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRBufferizationTestPasses PUBLIC
   MLIRBufferizationDialect
   MLIRBufferizationTransforms
   MLIRIR
diff --git a/mlir/test/lib/Dialect/ControlFlow/CMakeLists.txt b/mlir/test/lib/Dialect/ControlFlow/CMakeLists.txt
index 39d9555c7405e..03ae3c749f991 100644
--- a/mlir/test/lib/Dialect/ControlFlow/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/ControlFlow/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRControlFlowTestPasses
   TestAssert.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRControlFlowTestPasses PUBLIC
   MLIRControlFlowToLLVM
   MLIRFuncDialect
   MLIRLLVMCommonConversion
diff --git a/mlir/test/lib/Dialect/DLTI/CMakeLists.txt b/mlir/test/lib/Dialect/DLTI/CMakeLists.txt
index fddc3e342e0ac..98cf47c78866f 100644
--- a/mlir/test/lib/Dialect/DLTI/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/DLTI/CMakeLists.txt
@@ -5,9 +5,11 @@ add_mlir_library(MLIRDLTITestPasses
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRDLTITestPasses PUBLIC
   MLIRDLTIDialect
   MLIRPass
-  MLIRTestDialect
   )
 
 target_include_directories(MLIRDLTITestPasses
diff --git a/mlir/test/lib/Dialect/Func/CMakeLists.txt b/mlir/test/lib/Dialect/Func/CMakeLists.txt
index 6adf893e44f05..1fbe1f59e2fe5 100644
--- a/mlir/test/lib/Dialect/Func/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Func/CMakeLists.txt
@@ -5,10 +5,12 @@ add_mlir_library(MLIRFuncTestPasses
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRFuncTestPasses PUBLIC
   MLIRAffineDialect
   MLIRPass
   MLIRFuncTransforms
-  MLIRTestDialect
   MLIRTransformUtils
   )
 
diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
index 48cbc4ad5505b..4ca5974ed5a49 100644
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -36,8 +36,8 @@ add_mlir_library(MLIRGPUTestPasses
   TestGpuRewrite.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRGPUTestPasses PUBLIC
   ${LIBS}
   )
 
diff --git a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt
index 6a2f0ba2756d4..616f00a94a56e 100644
--- a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt
@@ -4,8 +4,8 @@ add_mlir_library(MLIRLLVMTestPasses
   TestPatterns.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRLLVMTestPasses PUBLIC
   MLIRAffineToStandard
   MLIRFuncDialect
   MLIRFuncToLLVM
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index 283e426b4e594..eb6f581252181 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -10,8 +10,8 @@ add_mlir_library(MLIRLinalgTestPasses
   TestPadFusion.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRLinalgTestPasses PUBLIC
   MLIRAffineDialect
   MLIRArithDialect
   MLIRArithTransforms
diff --git a/mlir/test/lib/Dialect/Math/CMakeLists.txt b/mlir/test/lib/Dialect/Math/CMakeLists.txt
index 195d392d6ad34..91e70d1785369 100644
--- a/mlir/test/lib/Dialect/Math/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Math/CMakeLists.txt
@@ -5,8 +5,8 @@ add_mlir_library(MLIRMathTestPasses
   TestPolynomialApproximation.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRMathTestPasses PUBLIC
   MLIRMathTransforms
   MLIRPass
   MLIRTransformUtils
diff --git a/mlir/test/lib/Dialect/MemRef/CMakeLists.txt b/mlir/test/lib/Dialect/MemRef/CMakeLists.txt
index 0498de3eb9317..39457ab2d0bf7 100644
--- a/mlir/test/lib/Dialect/MemRef/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/MemRef/CMakeLists.txt
@@ -7,10 +7,12 @@ add_mlir_library(MLIRMemRefTestPasses
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRMemRefTestPasses PUBLIC
   MLIRPass
   MLIRMemRefDialect
   MLIRMemRefTransforms
-  MLIRTestDialect
   )
 
 target_include_directories(MLIRMemRefTestPasses
diff --git a/mlir/test/lib/Dialect/Mesh/CMakeLists.txt b/mlir/test/lib/Dialect/Mesh/CMakeLists.txt
index 07e9bb6f9f238..7bd0493d11a7e 100644
--- a/mlir/test/lib/Dialect/Mesh/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Mesh/CMakeLists.txt
@@ -5,8 +5,8 @@ add_mlir_library(MLIRMeshTest
   TestSimplifications.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRMeshTest PUBLIC
   MLIRMeshDialect
   MLIRMeshTransforms
   MLIRPass
diff --git a/mlir/test/lib/Dialect/NVGPU/CMakeLists.txt b/mlir/test/lib/Dialect/NVGPU/CMakeLists.txt
index 6b333bdb3c08c..025f1a0dbc488 100644
--- a/mlir/test/lib/Dialect/NVGPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/NVGPU/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRNVGPUTestPasses
   TestNVGPUTransforms.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRNVGPUTestPasses PUBLIC
   MLIRIR
   MLIRAffineDialect
   MLIRAnalysis
diff --git a/mlir/test/lib/Dialect/SCF/CMakeLists.txt b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
index 792430cc84b65..791c2e681415a 100644
--- a/mlir/test/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
@@ -8,8 +8,8 @@ add_mlir_library(MLIRSCFTestPasses
   TestWhileOpBuilder.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRSCFTestPasses PUBLIC
   MLIRMemRefDialect
   MLIRPass
   MLIRSCFDialect
diff --git a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt
index 11760da0005cc..0e2c1344782d9 100644
--- a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt
@@ -9,8 +9,8 @@ add_mlir_library(MLIRSPIRVTestPasses
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRSPIRVTestPasses PUBLIC
   MLIRGPUDialect
   MLIRIR
   MLIRPass
diff --git a/mlir/test/lib/Dialect/Shape/CMakeLists.txt b/mlir/test/lib/Dialect/Shape/CMakeLists.txt
index 545f13db25a84..90c8c6cb223b8 100644
--- a/mlir/test/lib/Dialect/Shape/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Shape/CMakeLists.txt
@@ -8,8 +8,8 @@ add_mlir_library(MLIRShapeTestPasses
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Shape
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRShapeTestPasses PUBLIC
   MLIRIR
   MLIRPass
   MLIRShapeOpsTransforms
diff --git a/mlir/test/lib/Dialect/Tensor/CMakeLists.txt b/mlir/test/lib/Dialect/Tensor/CMakeLists.txt
index f22b21d173296..28eae8ffb670f 100644
--- a/mlir/test/lib/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Tensor/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRTensorTestPasses
   TestTensorTransforms.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTensorTestPasses PUBLIC
   MLIRArithDialect
   MLIRLinalgDialect
   MLIRPass
diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt
index 967101242e26b..618b13da9899f 100644
--- a/mlir/test/lib/Dialect/Test/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt
@@ -68,8 +68,8 @@ add_mlir_library(MLIRTestDialect
   MLIRTestOpsIncGen
   MLIRTestOpsSyntaxIncGen
   MLIRTestOpsShardGen
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestDialect PUBLIC
   MLIRControlFlowInterfaces
   MLIRDataLayoutInterfaces
   MLIRDerivedAttributeOpInterface
diff --git a/mlir/test/lib/Dialect/TestDyn/CMakeLists.txt b/mlir/test/lib/Dialect/TestDyn/CMakeLists.txt
index 13eb9040b0744..2c6854efee530 100644
--- a/mlir/test/lib/Dialect/TestDyn/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/TestDyn/CMakeLists.txt
@@ -2,7 +2,7 @@ add_mlir_dialect_library(MLIRTestDynDialect
   TestDynDialect.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRTestDynDialect PUBLIC
   MLIRIR
 )
diff --git a/mlir/test/lib/Dialect/Tosa/CMakeLists.txt b/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
index e5dd35d6dbcb7..7d40881ee6ee4 100644
--- a/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
@@ -8,8 +8,8 @@ add_mlir_dialect_library(MLIRTosaTestPasses
 
   DEPENDS
   MLIRTosaPassIncGen
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTosaTestPasses PUBLIC
   MLIRFuncDialect
   MLIRPass
   MLIRTosaDialect
diff --git a/mlir/test/lib/Dialect/Transform/CMakeLists.txt b/mlir/test/lib/Dialect/Transform/CMakeLists.txt
index ca141d2778ee2..512a0a89bfea9 100644
--- a/mlir/test/lib/Dialect/Transform/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Transform/CMakeLists.txt
@@ -15,8 +15,8 @@ add_mlir_library(MLIRTestTransformDialect
 
   DEPENDS
   MLIRTestTransformDialectExtensionIncGen
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRTestTransformDialect PUBLIC
   MLIRIR
   MLIRPass
   MLIRPDLDialect
diff --git a/mlir/test/lib/Dialect/Vector/CMakeLists.txt b/mlir/test/lib/Dialect/Vector/CMakeLists.txt
index d8e6b24b0c26f..e16937029ac0e 100644
--- a/mlir/test/lib/Dialect/Vector/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Vector/CMakeLists.txt
@@ -3,8 +3,8 @@ add_mlir_library(MLIRVectorTestPasses
   TestVectorTransforms.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRVectorTestPasses PUBLIC
   MLIRAffineDialect
   MLIRAnalysis
   MLIRArithDialect
diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt
index 9fe2ba0c610ef..824c5a96760aa 100644
--- a/mlir/test/lib/IR/CMakeLists.txt
+++ b/mlir/test/lib/IR/CMakeLists.txt
@@ -28,8 +28,8 @@ add_mlir_library(MLIRTestIR
   TestVisitorsGeneric.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestIR PUBLIC
   MLIRPass
   MLIRBytecodeReader
   MLIRBytecodeWriter
diff --git a/mlir/test/lib/Interfaces/LoopLikeInterface/CMakeLists.txt b/mlir/test/lib/Interfaces/LoopLikeInterface/CMakeLists.txt
index f20219e00cb86..cb1e7adb412c5 100644
--- a/mlir/test/lib/Interfaces/LoopLikeInterface/CMakeLists.txt
+++ b/mlir/test/lib/Interfaces/LoopLikeInterface/CMakeLists.txt
@@ -2,8 +2,8 @@ add_mlir_library(MLIRLoopLikeInterfaceTestPasses
   TestBlockInLoop.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRLoopLikeInterfaceTestPasses PUBLIC
   MLIRPass
   MLIRLoopLikeInterface
   MLIRFuncDialect
diff --git a/mlir/test/lib/Interfaces/TilingInterface/CMakeLists.txt b/mlir/test/lib/Interfaces/TilingInterface/CMakeLists.txt
index c2431dd15f1d5..657fcb76815bc 100644
--- a/mlir/test/lib/Interfaces/TilingInterface/CMakeLists.txt
+++ b/mlir/test/lib/Interfaces/TilingInterface/CMakeLists.txt
@@ -10,8 +10,8 @@ add_mlir_library(MLIRTilingInterfaceTestPasses
   MLIRTestTilingInterfaceTransformOpsIncGen
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTilingInterfaceTestPasses PUBLIC
   MLIRAffineDialect
   MLIRArithDialect
   MLIRIndexDialect
diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt
index b474c1863f335..6698af86b8ae6 100644
--- a/mlir/test/lib/Pass/CMakeLists.txt
+++ b/mlir/test/lib/Pass/CMakeLists.txt
@@ -10,8 +10,8 @@ add_mlir_library(MLIRTestPass
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Pass
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestPass PUBLIC
   ${conversion_libs}
   MLIRIR
   MLIRPass
diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
index a3624eb31e26e..e4cbbeb1f99bc 100644
--- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
+++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Implements a pipeline for use by mlir-vulkan-runner tests.
+// Implements a pipeline for use by Vulkan runner tests.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,9 +33,6 @@ struct VulkanRunnerPipelineOptions
   Option<bool> spirvWebGPUPrepare{
       *this, "spirv-webgpu-prepare",
       llvm::cl::desc("Run MLIR transforms used when targetting WebGPU")};
-  Option<bool> toLlvm{*this, "to-llvm",
-                      llvm::cl::desc("Run MLIR transforms to lower host code "
-                                     "to LLVM, intended for mlir-cpu-runner")};
 };
 
 void buildTestVulkanRunnerPipeline(OpPassManager &passManager,
@@ -64,18 +61,14 @@ void buildTestVulkanRunnerPipeline(OpPassManager &passManager,
 
   passManager.addPass(createGpuModuleToBinaryPass());
 
-  if (options.toLlvm) {
-    passManager.addPass(createFinalizeMemRefToLLVMConversionPass());
-    passManager.nest<func::FuncOp>().addPass(
-        LLVM::createRequestCWrappersPass());
-    // vulkan-runtime-wrappers.cpp uses the non-bare-pointer calling convention,
-    // and the type check is needed to prevent accidental ABI mismatches.
-    GpuToLLVMConversionPassOptions opt;
-    opt.hostBarePtrCallConv = false;
-    opt.kernelBarePtrCallConv = false;
-    opt.typeCheckKernelArgs = true;
-    passManager.addPass(createGpuToLLVMConversionPass(opt));
-  }
+  passManager.addPass(createFinalizeMemRefToLLVMConversionPass());
+  passManager.nest<func::FuncOp>().addPass(LLVM::createRequestCWrappersPass());
+  // VulkanRuntimeWrappers.cpp requires these calling convention options.
+  GpuToLLVMConversionPassOptions opt;
+  opt.hostBarePtrCallConv = false;
+  opt.kernelBarePtrCallConv = true;
+  opt.kernelIntersperseSizeCallConv = true;
+  passManager.addPass(createGpuToLLVMConversionPass(opt));
 }
 
 } // namespace
@@ -84,8 +77,9 @@ namespace mlir::test {
 void registerTestVulkanRunnerPipeline() {
   PassPipelineRegistration<VulkanRunnerPipelineOptions>(
       "test-vulkan-runner-pipeline",
-      "Runs a series of passes for lowering GPU-dialect MLIR to "
-      "SPIR-V-dialect MLIR intended for mlir-vulkan-runner or mlir-cpu-runner.",
+      "Runs a series of passes intended for Vulkan runner tests. Lowers GPU "
+      "dialect to LLVM dialect for the host and to serialized Vulkan SPIR-V "
+      "for the device.",
       buildTestVulkanRunnerPipeline);
 }
 } // namespace mlir::test
diff --git a/mlir/test/lib/Reducer/CMakeLists.txt b/mlir/test/lib/Reducer/CMakeLists.txt
index 61743b6097d61..1ea33bdd0a5b9 100644
--- a/mlir/test/lib/Reducer/CMakeLists.txt
+++ b/mlir/test/lib/Reducer/CMakeLists.txt
@@ -9,8 +9,8 @@ add_mlir_library(MLIRTestReducer
 
   LINK_COMPONENTS
   Core
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestReducer PUBLIC
   MLIRIR
   MLIRPass
   MLIRSupport
diff --git a/mlir/test/lib/Rewrite/CMakeLists.txt b/mlir/test/lib/Rewrite/CMakeLists.txt
index f8926aa8e4870..75702ca89333a 100644
--- a/mlir/test/lib/Rewrite/CMakeLists.txt
+++ b/mlir/test/lib/Rewrite/CMakeLists.txt
@@ -7,8 +7,8 @@ if (MLIR_ENABLE_PDL_IN_PATTERNMATCH)
 
     ADDITIONAL_HEADER_DIRS
     ${MLIR_MAIN_INCLUDE_DIR}/mlir/Rewrite
-
-    LINK_LIBS PUBLIC
+    )
+  mlir_target_link_libraries(MLIRTestRewrite PUBLIC
     MLIRIR
     MLIRPass
     MLIRSupport
diff --git a/mlir/test/lib/Tools/PDLL/CMakeLists.txt b/mlir/test/lib/Tools/PDLL/CMakeLists.txt
index 408c29ea35ec3..60cf30b33e559 100644
--- a/mlir/test/lib/Tools/PDLL/CMakeLists.txt
+++ b/mlir/test/lib/Tools/PDLL/CMakeLists.txt
@@ -20,13 +20,15 @@ add_mlir_library(MLIRTestPDLL
   MLIRTestPDLLPatternsIncGen
 
   LINK_LIBS PUBLIC
+  MLIRTestDialect
+  )
+mlir_target_link_libraries(MLIRTestPDLL PUBLIC
   MLIRCastInterfaces
   MLIRIR
   MLIRPass
   MLIRPDLInterpDialect
   MLIRPDLDialect
   MLIRSupport
-  MLIRTestDialect
   MLIRTransformUtils
   )
 
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 66b1faf78e2d8..b91265d20fb48 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -34,8 +34,8 @@ add_mlir_library(MLIRTestTransforms
 
   DEPENDS
   ${MLIRTestTransformsPDLDep}
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRTestTransforms PUBLIC
   MLIRAnalysis
   MLIRFuncDialect
   MLIRInferIntRangeInterface
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index 16862bd22cf97..c3bcc1e84e502 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -214,7 +214,7 @@ def find_real_python_interpreter():
 ]
 
 if config.enable_vulkan_runner:
-    tools.extend([add_runtime("vulkan-runtime-wrappers")])
+    tools.extend([add_runtime("mlir_vulkan_runtime")])
 
 if config.enable_rocm_runner:
     tools.extend([add_runtime("mlir_rocm_runtime")])
diff --git a/mlir/test/python/lib/CMakeLists.txt b/mlir/test/python/lib/CMakeLists.txt
index 198ed8211e773..9a813dace2f54 100644
--- a/mlir/test/python/lib/CMakeLists.txt
+++ b/mlir/test/python/lib/CMakeLists.txt
@@ -12,8 +12,8 @@ add_mlir_library(MLIRPythonTestDialect
 
   DEPENDS
   MLIRPythonTestIncGen
-
-  LINK_LIBS PUBLIC
+)
+mlir_target_link_libraries(MLIRPythonTestDialect PUBLIC
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRSupport
diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt
index 072e83c5d45ea..72a857b114fbf 100644
--- a/mlir/tools/CMakeLists.txt
+++ b/mlir/tools/CMakeLists.txt
@@ -7,7 +7,6 @@ add_subdirectory(mlir-reduce)
 add_subdirectory(mlir-rewrite)
 add_subdirectory(mlir-shlib)
 add_subdirectory(mlir-translate)
-add_subdirectory(mlir-vulkan-runner)
 add_subdirectory(tblgen-lsp-server)
 add_subdirectory(tblgen-to-irdl)
 
diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt
index 3563d66fa9e79..68b31e563ba35 100644
--- a/mlir/tools/mlir-opt/CMakeLists.txt
+++ b/mlir/tools/mlir-opt/CMakeLists.txt
@@ -91,8 +91,8 @@ add_mlir_library(MLIRMlirOptMain
   mlir-opt.cpp
 
   EXCLUDE_FROM_LIBMLIR
-
-  LINK_LIBS PUBLIC
+  )
+mlir_target_link_libraries(MLIRMlirOptMain PUBLIC
   ${LIBS}
   ${test_libs}
   )
diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
deleted file mode 100644
index 26d6caacb0a7b..0000000000000
--- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  mlir-vulkan-runner.cpp
-  vulkan-runtime-wrappers.cpp
-  VulkanRuntime.cpp
-  VulkanRuntime.h
-  )
-
-if (MLIR_ENABLE_VULKAN_RUNNER)
-  message(STATUS "Building the Vulkan runner")
-
-  find_package(Vulkan)
-
-  # If Vulkan is not found try a path specified by VULKAN_SDK.
-  if (NOT Vulkan_FOUND)
-    if ("$ENV{VULKAN_SDK}" STREQUAL "")
-      message(FATAL_ERROR "Vulkan not found through CMake; please provide "
-                          "VULKAN_SDK path as an environment variable")
-    endif()
-
-    find_library(Vulkan_LIBRARY vulkan HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
-    if (Vulkan_LIBRARY)
-      set(Vulkan_FOUND ON)
-      set(Vulkan_INCLUDE_DIR "$ENV{VULKAN_SDK}/include")
-      message(STATUS "Found Vulkan: " ${Vulkan_LIBRARY})
-    endif()
-  endif()
-
-  if (NOT Vulkan_FOUND)
-    message(FATAL_ERROR "Cannot find Vulkan library")
-  endif()
-
-  add_llvm_library(vulkan-runtime-wrappers SHARED
-    vulkan-runtime-wrappers.cpp
-    VulkanRuntime.cpp
-  )
-
-  target_include_directories(vulkan-runtime-wrappers
-    PUBLIC
-    ${Vulkan_INCLUDE_DIR}
-  )
-
-  # *IMPORTANT*: This library cannot depend on LLVM libraries. Otherwise,
-  # it may cause LLVM version conflict when used together with other shared
-  # libraries depending on LLVM. Notably, Mesa, who implements Vulkan
-  # drivers on Linux, depends on the system libLLVM.so.
-  target_link_libraries(vulkan-runtime-wrappers
-    PUBLIC
-    ${Vulkan_LIBRARY}
-  )
-
-  get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
-  set(LIBS
-    ${conversion_libs}
-    MLIRAnalysis
-    MLIRArithDialect
-    MLIRBuiltinToLLVMIRTranslation
-    MLIRExecutionEngine
-    MLIRFuncDialect
-    MLIRGPUDialect
-    MLIRIR
-    MLIRJitRunner
-    MLIRLLVMDialect
-    MLIRLLVMCommonConversion
-    MLIRLLVMToLLVMIRTranslation
-    MLIRMemRefDialect
-    MLIRMemRefToLLVM
-    MLIRParser
-    MLIRSPIRVDialect
-    MLIRSPIRVTransforms
-    MLIRSupport
-    MLIRTargetLLVMIRExport
-    MLIRTransforms
-    MLIRTranslateLib
-    MLIRVectorDialect
-    MLIRVectorToLLVMPass
-    ${Vulkan_LIBRARY}
-  )
-
-  # Manually expand the target library, since our MLIR libraries
-  # aren't plugged into the LLVM dependency tracking. If we don't
-  # do this then we can't insert the CodeGen library after ourselves
-  llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens)
-  # Prepend LLVM in front of every target, this is how the library
-  # are named with CMake
-  SET(targets_to_link)
-  FOREACH(t ${TARGET_LIBS})
-    LIST(APPEND targets_to_link "LLVM${t}")
-  ENDFOREACH(t)
-
-  add_mlir_tool(mlir-vulkan-runner
-    mlir-vulkan-runner.cpp
-
-    DEPENDS
-    vulkan-runtime-wrappers
-  )
-  llvm_update_compile_flags(mlir-vulkan-runner)
-  target_link_libraries(mlir-vulkan-runner PRIVATE ${LIBS})
-
-endif()
diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
deleted file mode 100644
index 090df2d9ed2a5..0000000000000
--- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-//===- mlir-vulkan-runner.cpp - MLIR Vulkan Execution Driver --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a command line utility that executes an MLIR file on the Vulkan by
-// translating MLIR GPU module to SPIR-V and host part to LLVM IR before
-// JIT-compiling and executing the latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
-#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
-#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
-#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
-#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
-#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
-#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/MemRef/Transforms/Passes.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
-#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
-#include "mlir/Dialect/Vector/IR/VectorOps.h"
-#include "mlir/ExecutionEngine/JitRunner.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-static LogicalResult runMLIRPasses(Operation *op, JitRunnerOptions &) {
-  auto module = dyn_cast<ModuleOp>(op);
-  if (!module)
-    return op->emitOpError("expected a 'builtin.module' op");
-  PassManager passManager(module.getContext());
-  if (failed(applyPassManagerCLOptions(passManager)))
-    return failure();
-
-  passManager.addPass(createConvertGpuLaunchFuncToVulkanLaunchFuncPass());
-  passManager.addPass(createFinalizeMemRefToLLVMConversionPass());
-  passManager.addPass(createConvertVectorToLLVMPass());
-  passManager.nest<func::FuncOp>().addPass(LLVM::createRequestCWrappersPass());
-  ConvertFuncToLLVMPassOptions funcToLLVMOptions{};
-  funcToLLVMOptions.indexBitwidth =
-      DataLayout(module).getTypeSizeInBits(IndexType::get(module.getContext()));
-  passManager.addPass(createConvertFuncToLLVMPass(funcToLLVMOptions));
-  passManager.addPass(createArithToLLVMConversionPass());
-  passManager.addPass(createConvertControlFlowToLLVMPass());
-  passManager.addPass(createReconcileUnrealizedCastsPass());
-  passManager.addPass(createConvertVulkanLaunchFuncToVulkanCallsPass());
-
-  return passManager.run(module);
-}
-
-int main(int argc, char **argv) {
-  llvm::llvm_shutdown_obj x;
-  registerPassManagerCLOptions();
-
-  llvm::InitLLVM y(argc, argv);
-  llvm::InitializeNativeTarget();
-  llvm::InitializeNativeTargetAsmPrinter();
-
-  mlir::JitRunnerConfig jitRunnerConfig;
-  jitRunnerConfig.mlirTransformer = runMLIRPasses;
-
-  mlir::DialectRegistry registry;
-  registry.insert<mlir::arith::ArithDialect, mlir::LLVM::LLVMDialect,
-                  mlir::gpu::GPUDialect, mlir::spirv::SPIRVDialect,
-                  mlir::scf::SCFDialect, mlir::func::FuncDialect,
-                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect>();
-  mlir::registerBuiltinDialectTranslation(registry);
-  mlir::registerLLVMDialectTranslation(registry);
-
-  return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
-}
diff --git a/mlir/unittests/ExecutionEngine/CMakeLists.txt b/mlir/unittests/ExecutionEngine/CMakeLists.txt
index 8aee46f261b56..4ef69a8ea042d 100644
--- a/mlir/unittests/ExecutionEngine/CMakeLists.txt
+++ b/mlir/unittests/ExecutionEngine/CMakeLists.txt
@@ -1,3 +1,10 @@
+set(LLVM_LINK_COMPONENTS
+  nativecodegen
+  native
+  orcjit
+  support
+)
+
 add_mlir_unittest(MLIRExecutionEngineTests
   DynamicMemRef.cpp
   StridedMemRef.cpp
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index 565235cd48a91..c487ff29680fa 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -158,63 +158,61 @@ struct DateEnvironmentRAII {
 /// TODO
 void resetStateForThread(uint32_t TId);
 
-inline uint32_t &lookupForModify32Impl(uint32_t state::ICVStateTy::*Var,
-                                       IdentTy *Ident, bool ForceTeamState) {
-  if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() ||
-                 !TeamState.HasThreadState))
-    return TeamState.ICVState.*Var;
-  uint32_t TId = mapping::getThreadIdInBlock();
-  if (OMP_UNLIKELY(!ThreadStates[TId])) {
-    ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
-        sizeof(ThreadStateTy), "ICV modification outside data environment"));
-    ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!");
-    TeamState.HasThreadState = true;
-    ThreadStates[TId]->init();
+// FIXME: https://github.com/llvm/llvm-project/issues/123241.
+#define lookupForModify32Impl(Member, Ident, ForceTeamState)                   \
+  {                                                                            \
+    if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() ||          \
+                   !TeamState.HasThreadState))                                 \
+      return TeamState.ICVState.Member;                                        \
+    uint32_t TId = mapping::getThreadIdInBlock();                              \
+    if (OMP_UNLIKELY(!ThreadStates[TId])) {                                    \
+      ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(                   \
+          memory::allocGlobal(sizeof(ThreadStateTy),                           \
+                              "ICV modification outside data environment"));   \
+      ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!");     \
+      TeamState.HasThreadState = true;                                         \
+      ThreadStates[TId]->init();                                               \
+    }                                                                          \
+    return ThreadStates[TId]->ICVState.Member;                                 \
   }
-  return ThreadStates[TId]->ICVState.*Var;
-}
 
-inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
-                            bool ForceTeamState) {
-  auto TId = mapping::getThreadIdInBlock();
-  if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() &&
-                   TeamState.HasThreadState && ThreadStates[TId]))
-    return ThreadStates[TId]->ICVState.*Var;
-  return TeamState.ICVState.*Var;
-}
+// FIXME: https://github.com/llvm/llvm-project/issues/123241.
+#define lookupImpl(Member, ForceTeamState)                                     \
+  {                                                                            \
+    auto TId = mapping::getThreadIdInBlock();                                  \
+    if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() &&        \
+                     TeamState.HasThreadState && ThreadStates[TId]))           \
+      return ThreadStates[TId]->ICVState.Member;                               \
+    return TeamState.ICVState.Member;                                          \
+  }
 
 [[gnu::always_inline, gnu::flatten]] inline uint32_t &
 lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
   switch (Kind) {
   case state::VK_NThreads:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::NThreadsVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident,
-                                 ForceTeamState);
+      lookupImpl(NThreadsVar, ForceTeamState);
+    lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState);
   case state::VK_Level:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::LevelVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident, ForceTeamState);
+      lookupImpl(LevelVar, ForceTeamState);
+    lookupForModify32Impl(LevelVar, Ident, ForceTeamState);
   case state::VK_ActiveLevel:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::ActiveLevelVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident,
-                                 ForceTeamState);
+      lookupImpl(ActiveLevelVar, ForceTeamState);
+    lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState);
   case state::VK_MaxActiveLevels:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::MaxActiveLevelsVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident,
-                                 ForceTeamState);
+      lookupImpl(MaxActiveLevelsVar, ForceTeamState);
+    lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState);
   case state::VK_RunSched:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::RunSchedVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident,
-                                 ForceTeamState);
+      lookupImpl(RunSchedVar, ForceTeamState);
+    lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState);
   case state::VK_RunSchedChunk:
     if (IsReadonly)
-      return lookupImpl(&ICVStateTy::RunSchedChunkVar, ForceTeamState);
-    return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident,
-                                 ForceTeamState);
+      lookupImpl(RunSchedChunkVar, ForceTeamState);
+    lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState);
   case state::VK_ParallelTeamSize:
     return TeamState.ParallelTeamSize;
   case state::VK_HasThreadState:
diff --git a/offload/docs/declare_target_indirect.md b/offload/docs/declare_target_indirect.md
index 443a5ab1d4b96..bd66dcd98154f 100644
--- a/offload/docs/declare_target_indirect.md
+++ b/offload/docs/declare_target_indirect.md
@@ -25,7 +25,7 @@ The offload entries table that is created for the host and for each of the devic
 
 Compiler will also produce an entry for each procedure listed in **indirect** clause of **declare target** construct:
 ```C++
-struct __tgt_offload_entry {
+struct llvm::offloading::EntryTy {
   void *addr;       // Pointer to the function
   char *name;       // Name of the function
   size_t size;      // 0 for function
@@ -82,7 +82,7 @@ struct __omp_offloading_fptr_map_ty {
 };
 ```
 
-Where `host_ptr` is `__tgt_offload_entry::addr` in a **host** offload entry, and `tgt_ptr` is `__tgt_offload_entry::addr` in the corresponding **device** offload entry (which may be found using the populated `Device.HostDataToTargetMap`).
+Where `host_ptr` is `llvm::offloading::EntryTy::addr` in a **host** offload entry, and `tgt_ptr` is `llvm::offloading::EntryTy::addr` in the corresponding **device** offload entry (which may be found using the populated `Device.HostDataToTargetMap`).
 
 When all `__omp_offloading_function_ptr_map_ty` entries are collected in a single host array, `libomptarget` sorts the table by `host_ptr` values and passes it to the device plugin for registration, if plugin supports optional `__tgt_rtl_set_function_ptr_map` API.
 
diff --git a/offload/include/OffloadEntry.h b/offload/include/OffloadEntry.h
index da1de8123be97..551ee698759e2 100644
--- a/offload/include/OffloadEntry.h
+++ b/offload/include/OffloadEntry.h
@@ -22,24 +22,25 @@ class DeviceImageTy;
 
 class OffloadEntryTy {
   DeviceImageTy &DeviceImage;
-  __tgt_offload_entry &OffloadEntry;
+  llvm::offloading::EntryTy &OffloadEntry;
 
 public:
-  OffloadEntryTy(DeviceImageTy &DeviceImage, __tgt_offload_entry &OffloadEntry)
+  OffloadEntryTy(DeviceImageTy &DeviceImage,
+                 llvm::offloading::EntryTy &OffloadEntry)
       : DeviceImage(DeviceImage), OffloadEntry(OffloadEntry) {}
 
   bool isGlobal() const { return getSize() != 0; }
-  size_t getSize() const { return OffloadEntry.size; }
+  size_t getSize() const { return OffloadEntry.Size; }
 
-  void *getAddress() const { return OffloadEntry.addr; }
-  llvm::StringRef getName() const { return OffloadEntry.name; }
-  const char *getNameAsCStr() const { return OffloadEntry.name; }
+  void *getnAddress() const { return OffloadEntry.Address; }
+  llvm::StringRef getName() const { return OffloadEntry.SymbolName; }
+  const char *getNameAsCStr() const { return OffloadEntry.SymbolName; }
   __tgt_bin_desc *getBinaryDescription() const;
 
   bool isLink() const { return hasFlags(OMP_DECLARE_TARGET_LINK); }
 
   bool hasFlags(OpenMPOffloadingDeclareTargetFlags Flags) const {
-    return Flags & OffloadEntry.flags;
+    return Flags & OffloadEntry.Flags;
   }
 };
 
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index 94b6d1090b5a8..324dcac7787ea 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -81,7 +81,8 @@ struct PluginManager {
   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
   std::mutex TrlTblMtx; ///< For Translation Table
   /// Host offload entries in order of image registration
-  llvm::SmallVector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
+  llvm::SmallVector<llvm::offloading::EntryTy *>
+      HostEntriesBeginRegistrationOrder;
 
   /// Map from ptrs on the host to an entry in the Translation Table
   HostPtrToTableMapTy HostPtrToTableMap;
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 4c1f7712249a3..978b53d5d69b9 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -17,28 +17,20 @@
 #include "Environment.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/Offloading/Utility.h"
 
 #include <cstddef>
 #include <cstdint>
 
 extern "C" {
 
-/// This struct is a record of an entry point or global. For a function
-/// entry point the size is expected to be zero
-struct __tgt_offload_entry {
-  void *addr;       // Pointer to the offload entry info (function or global)
-  char *name;       // Name of the function or global
-  size_t size;      // Size of the entry info (0 if it is a function)
-  int32_t flags;    // Flags associated with the entry, e.g. 'link'.
-  int32_t data;     // Extra data associated with the entry.
-};
-
 /// This struct is a record of the device image information
 struct __tgt_device_image {
-  void *ImageStart;                  // Pointer to the target code start
-  void *ImageEnd;                    // Pointer to the target code end
-  __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
-  __tgt_offload_entry *EntriesEnd;   // End of table (non inclusive)
+  void *ImageStart; // Pointer to the target code start
+  void *ImageEnd;   // Pointer to the target code end
+  llvm::offloading::EntryTy
+      *EntriesBegin; // Begin of table with all target entries
+  llvm::offloading::EntryTy *EntriesEnd; // End of table (non inclusive)
 };
 
 struct __tgt_device_info {
@@ -51,14 +43,16 @@ struct __tgt_device_info {
 struct __tgt_bin_desc {
   int32_t NumDeviceImages;          // Number of device types supported
   __tgt_device_image *DeviceImages; // Array of device images (1 per dev. type)
-  __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries
-  __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
+  llvm::offloading::EntryTy
+      *HostEntriesBegin; // Begin of table with all host entries
+  llvm::offloading::EntryTy *HostEntriesEnd; // End of table (non inclusive)
 };
 
 /// This struct contains the offload entries identified by the target runtime
 struct __tgt_target_table {
-  __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
-  __tgt_offload_entry
+  llvm::offloading::EntryTy
+      *EntriesBegin; // Begin of the table with all the entries
+  llvm::offloading::EntryTy
       *EntriesEnd; // End of the table with all the entries (non inclusive)
 };
 
@@ -107,9 +101,9 @@ struct KernelArgsTy {
   } Flags = {0, 0, 0};
   // The number of teams (for x,y,z dimension).
   uint32_t NumTeams[3] = {0, 0, 0};
-   // The number of threads (for x,y,z dimension).
+  // The number of threads (for x,y,z dimension).
   uint32_t ThreadLimit[3] = {0, 0, 0};
-  uint32_t DynCGroupMem = 0;     // Amount of dynamic cgroup memory requested.
+  uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested.
 };
 static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
               "Invalid struct size");
diff --git a/offload/include/rtl.h b/offload/include/rtl.h
index 5e198bdad4364..38f1dd24011e0 100644
--- a/offload/include/rtl.h
+++ b/offload/include/rtl.h
@@ -22,7 +22,7 @@
 
 /// Map between the host entry begin and the translation table. Each
 /// registered library gets one TranslationTable. Use the map from
-/// __tgt_offload_entry so that we may quickly determine whether we
+/// llvm::offloading::EntryTy so that we may quickly determine whether we
 /// are trying to (re)register an existing lib or really have a new one.
 struct TranslationTable {
   __tgt_target_table HostTable;
@@ -33,14 +33,14 @@ struct TranslationTable {
       TargetsImages; // One image per device ID.
 
   // Arrays of entries active on the device.
-  llvm::SmallVector<llvm::SmallVector<__tgt_offload_entry>>
+  llvm::SmallVector<llvm::SmallVector<llvm::offloading::EntryTy>>
       TargetsEntries; // One table per device ID.
 
   // Table of entry points or NULL if it was not already computed.
   llvm::SmallVector<__tgt_target_table *>
       TargetsTable; // One table per device ID.
 };
-typedef std::map<__tgt_offload_entry *, TranslationTable>
+typedef std::map<llvm::offloading::EntryTy *, TranslationTable>
     HostEntriesBeginToTransTableTy;
 
 /// Map between the host ptr and a table index
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index bd58d1d6e0d96..a164bfb51d026 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -376,24 +376,24 @@ setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device,
                        DeviceImageTy &Image) {
   GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
 
-  llvm::ArrayRef<__tgt_offload_entry> Entries(Image.getTgtImage()->EntriesBegin,
-                                              Image.getTgtImage()->EntriesEnd);
+  llvm::ArrayRef<llvm::offloading::EntryTy> Entries(
+      Image.getTgtImage()->EntriesBegin, Image.getTgtImage()->EntriesEnd);
   llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
   for (const auto &Entry : Entries) {
-    if (Entry.size == 0 || !(Entry.flags & OMP_DECLARE_TARGET_INDIRECT))
+    if (Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
       continue;
 
-    assert(Entry.size == sizeof(void *) && "Global not a function pointer?");
+    assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
     auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
 
-    GlobalTy DeviceGlobal(Entry.name, Entry.size);
+    GlobalTy DeviceGlobal(Entry.SymbolName, Entry.Size);
     if (auto Err =
             Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal))
       return std::move(Err);
 
-    HstPtr = Entry.addr;
+    HstPtr = Entry.Address;
     if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(),
-                                       Entry.size, nullptr))
+                                       Entry.Size, nullptr))
       return std::move(Err);
   }
 
diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp
index 315b953f9b31a..96fa0bb170489 100644
--- a/offload/src/PluginManager.cpp
+++ b/offload/src/PluginManager.cpp
@@ -128,10 +128,10 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
   PM->RTLsMtx.lock();
 
   // Add in all the OpenMP requirements associated with this binary.
-  for (__tgt_offload_entry &Entry :
+  for (llvm::offloading::EntryTy &Entry :
        llvm::make_range(Desc->HostEntriesBegin, Desc->HostEntriesEnd))
-    if (Entry.flags == OMP_REGISTER_REQUIRES)
-      PM->addRequirements(Entry.data);
+    if (Entry.Flags == OMP_REGISTER_REQUIRES)
+      PM->addRequirements(Entry.Data);
 
   // Extract the exectuable image and extra information if availible.
   for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
@@ -268,9 +268,9 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
 
   // Remove entries from PM->HostPtrToTableMap
   PM->TblMapMtx.lock();
-  for (__tgt_offload_entry *Cur = Desc->HostEntriesBegin;
+  for (llvm::offloading::EntryTy *Cur = Desc->HostEntriesBegin;
        Cur < Desc->HostEntriesEnd; ++Cur) {
-    PM->HostPtrToTableMap.erase(Cur->addr);
+    PM->HostPtrToTableMap.erase(Cur->Address);
   }
 
   // Remove translation table for this descriptor.
@@ -336,35 +336,36 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
       }
 
       // 3) Create the translation table.
-      llvm::SmallVector<__tgt_offload_entry> &DeviceEntries =
+      llvm::SmallVector<llvm::offloading::EntryTy> &DeviceEntries =
           TransTable->TargetsEntries[DeviceId];
-      for (__tgt_offload_entry &Entry :
+      for (llvm::offloading::EntryTy &Entry :
            llvm::make_range(Img->EntriesBegin, Img->EntriesEnd)) {
         __tgt_device_binary &Binary = *BinaryOrErr;
 
-        __tgt_offload_entry DeviceEntry = Entry;
-        if (Entry.size) {
-          if (Device.RTL->get_global(Binary, Entry.size, Entry.name,
-                                     &DeviceEntry.addr) != OFFLOAD_SUCCESS)
-            REPORT("Failed to load symbol %s\n", Entry.name);
+        llvm::offloading::EntryTy DeviceEntry = Entry;
+        if (Entry.Size) {
+          if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
+                                     &DeviceEntry.Address) != OFFLOAD_SUCCESS)
+            REPORT("Failed to load symbol %s\n", Entry.SymbolName);
 
           // If unified memory is active, the corresponding global is a device
           // reference to the host global. We need to initialize the pointer on
           // the device to point to the memory on the host.
           if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
               (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
-            if (Device.RTL->data_submit(DeviceId, DeviceEntry.addr, Entry.addr,
-                                        Entry.size) != OFFLOAD_SUCCESS)
-              REPORT("Failed to write symbol for USM %s\n", Entry.name);
+            if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
+                                        Entry.Address,
+                                        Entry.Size) != OFFLOAD_SUCCESS)
+              REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
           }
-        } else if (Entry.addr) {
-          if (Device.RTL->get_function(Binary, Entry.name, &DeviceEntry.addr) !=
-              OFFLOAD_SUCCESS)
-            REPORT("Failed to load kernel %s\n", Entry.name);
+        } else if (Entry.Address) {
+          if (Device.RTL->get_function(Binary, Entry.SymbolName,
+                                       &DeviceEntry.Address) != OFFLOAD_SUCCESS)
+            REPORT("Failed to load kernel %s\n", Entry.SymbolName);
         }
         DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n",
-           DPxPTR(Entry.addr), (Entry.size) ? " global" : "", Entry.name,
-           DPxPTR(DeviceEntry.addr));
+           DPxPTR(Entry.Address), (Entry.Size) ? " global" : "",
+           Entry.SymbolName, DPxPTR(DeviceEntry.Address));
 
         DeviceEntries.emplace_back(DeviceEntry);
       }
@@ -396,30 +397,31 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
           Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
 
       __tgt_target_table *HostTable = &TransTable->HostTable;
-      for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
-                               *CurrHostEntry = HostTable->EntriesBegin,
-                               *EntryDeviceEnd = TargetTable->EntriesEnd;
+      for (llvm::offloading::EntryTy *
+               CurrDeviceEntry = TargetTable->EntriesBegin,
+              *CurrHostEntry = HostTable->EntriesBegin,
+              *EntryDeviceEnd = TargetTable->EntriesEnd;
            CurrDeviceEntry != EntryDeviceEnd;
            CurrDeviceEntry++, CurrHostEntry++) {
-        if (CurrDeviceEntry->size == 0)
+        if (CurrDeviceEntry->Size == 0)
           continue;
 
-        assert(CurrDeviceEntry->size == CurrHostEntry->size &&
+        assert(CurrDeviceEntry->Size == CurrHostEntry->Size &&
                "data size mismatch");
 
         // Fortran may use multiple weak declarations for the same symbol,
         // therefore we must allow for multiple weak symbols to be loaded from
         // the fat binary. Treat these mappings as any other "regular"
         // mapping. Add entry to map.
-        if (Device.getMappingInfo().getTgtPtrBegin(HDTTMap, CurrHostEntry->addr,
-                                                   CurrHostEntry->size))
+        if (Device.getMappingInfo().getTgtPtrBegin(
+                HDTTMap, CurrHostEntry->Address, CurrHostEntry->Size))
           continue;
 
-        void *CurrDeviceEntryAddr = CurrDeviceEntry->addr;
+        void *CurrDeviceEntryAddr = CurrDeviceEntry->Address;
 
         // For indirect mapping, follow the indirection and map the actual
         // target.
-        if (CurrDeviceEntry->flags & OMP_DECLARE_TARGET_INDIRECT) {
+        if (CurrDeviceEntry->Flags & OMP_DECLARE_TARGET_INDIRECT) {
           AsyncInfoTy AsyncInfo(Device);
           void *DevPtr;
           Device.retrieveData(&DevPtr, CurrDeviceEntryAddr, sizeof(void *),
@@ -431,19 +433,21 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
 
         DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
            ", name \"%s\"\n",
-           DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
-           CurrDeviceEntry->size, CurrDeviceEntry->name);
+           DPxPTR(CurrHostEntry->Address), DPxPTR(CurrDeviceEntry->Address),
+           CurrDeviceEntry->Size, CurrDeviceEntry->SymbolName);
         HDTTMap->emplace(new HostDataToTargetTy(
-            (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
-            (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
-            (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
+            (uintptr_t)CurrHostEntry->Address /*HstPtrBase*/,
+            (uintptr_t)CurrHostEntry->Address /*HstPtrBegin*/,
+            (uintptr_t)CurrHostEntry->Address +
+                CurrHostEntry->Size /*HstPtrEnd*/,
             (uintptr_t)CurrDeviceEntryAddr /*TgtAllocBegin*/,
             (uintptr_t)CurrDeviceEntryAddr /*TgtPtrBegin*/,
-            false /*UseHoldRefCount*/, CurrHostEntry->name,
+            false /*UseHoldRefCount*/, CurrHostEntry->SymbolName,
             true /*IsRefCountINF*/));
 
         // Notify about the new mapping.
-        if (Device.notifyDataMapped(CurrHostEntry->addr, CurrHostEntry->size))
+        if (Device.notifyDataMapped(CurrHostEntry->Address,
+                                    CurrHostEntry->Size))
           return OFFLOAD_FAIL;
       }
     }
diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp
index 1a7af5649b9e2..89fa63347babe 100644
--- a/offload/src/omptarget.cpp
+++ b/offload/src/omptarget.cpp
@@ -977,9 +977,9 @@ TableMap *getTableMap(void *HostPtr) {
     TranslationTable *TransTable = &Itr->second;
     // iterate over all the host table entries to see if we can locate the
     // host_ptr.
-    __tgt_offload_entry *Cur = TransTable->HostTable.EntriesBegin;
+    llvm::offloading::EntryTy *Cur = TransTable->HostTable.EntriesBegin;
     for (uint32_t I = 0; Cur < TransTable->HostTable.EntriesEnd; ++Cur, ++I) {
-      if (Cur->addr != HostPtr)
+      if (Cur->Address != HostPtr)
         continue;
       // we got a match, now fill the HostPtrToTableMap so that we
       // may avoid this search next time.
@@ -1437,9 +1437,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
   }
 
   // Launch device execution.
-  void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
+  void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
   DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
-     TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);
+     TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr),
+     TM->Index);
 
   {
     assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
@@ -1525,9 +1526,10 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
 
   // Retrieve the target kernel pointer, allocate and store the recorded device
   // memory data, and launch device execution.
-  void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
+  void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
   DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
-     TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);
+     TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr),
+     TM->Index);
 
   void *TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr,
                                   TARGET_ALLOC_DEFAULT);
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index 1e9a6a84d8058..ff77154033239 100644
--- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -13,9 +13,11 @@
 
 #include "omptarget.h"
 
+#include "llvm/Frontend/Offloading/Utility.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
+
 #include <cstdint>
 #include <cstdlib>
 
@@ -91,11 +93,11 @@ int main(int argc, char **argv) {
   void *BAllocStart = reinterpret_cast<void *>(
       JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value());
 
-  __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0};
+  llvm::offloading::EntryTy KernelEntry = {nullptr, nullptr, 0, 0, 0};
   std::string KernelEntryName = KernelFunc.value().str();
-  KernelEntry.name = const_cast<char *>(KernelEntryName.c_str());
+  KernelEntry.SymbolName = const_cast<char *>(KernelEntryName.c_str());
   // Anything non-zero works to uniquely identify the kernel.
-  KernelEntry.addr = (void *)0x1;
+  KernelEntry.Address = (void *)0x1;
 
   ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB =
       MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false,
@@ -164,7 +166,7 @@ int main(int argc, char **argv) {
   }
 
   __tgt_target_kernel_replay(
-      /*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
+      /*Loc=*/nullptr, DeviceId, KernelEntry.Address, (char *)recored_data,
       DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),
       TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads,
       LoopTripCount.value());
diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index ffca887fbc09a..6641ac9a0c068 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -58,9 +58,12 @@ class ScopAnnotator {
   /// Annotate the new instruction @p I for all parallel loops.
   void annotate(llvm::Instruction *I);
 
-  /// Annotate the loop latch @p B wrt. @p L.
-  void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
-                         bool IsLoopVectorizerDisabled) const;
+  /// Annotate the loop latch @p B.
+  /// Last argument is optional, if no value is passed, we don't annotate
+  /// any vectorize metadata.
+  void annotateLoopLatch(
+      llvm::BranchInst *B, bool IsParallel,
+      std::optional<bool> EnableVectorizeMetadata = std::nullopt) const;
 
   /// Add alternative alias based pointers
   ///
diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 2285c746912f4..782b4b77e4ee4 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -128,8 +128,26 @@ void ScopAnnotator::popLoop(bool IsParallel) {
   LoopAttrEnv.pop_back();
 }
 
-void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
-                                      bool IsLoopVectorizerDisabled) const {
+static void addVectorizeMetadata(LLVMContext &Ctx,
+                                 SmallVector<Metadata *, 3> *Args,
+                                 bool EnableLoopVectorizer) {
+  MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+  ConstantInt *Value =
+      ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
+  ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
+  Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
+}
+
+void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
+                         llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
+  MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+  MDNode *AccGroup = ParallelLoops.back();
+  Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
+}
+
+void ScopAnnotator::annotateLoopLatch(
+    BranchInst *B, bool IsParallel,
+    std::optional<bool> EnableVectorizeMetadata) const {
   LLVMContext &Ctx = SE->getContext();
   SmallVector<Metadata *, 3> Args;
 
@@ -145,19 +163,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
     if (MData)
       llvm::append_range(Args, drop_begin(MData->operands(), 1));
   }
-
-  if (IsLoopVectorizerDisabled) {
-    MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
-    ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
-    ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
-    Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
-  }
-
-  if (IsParallel) {
-    MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
-    MDNode *AccGroup = ParallelLoops.back();
-    Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
-  }
+  if (IsParallel)
+    addParallelMetadata(Ctx, &Args, ParallelLoops);
+  if (EnableVectorizeMetadata.has_value())
+    addVectorizeMetadata(Ctx, &Args, *EnableVectorizeMetadata);
 
   // No metadata to annotate.
   if (!MData && Args.size() <= 1)
diff --git a/polly/lib/CodeGen/LoopGenerators.cpp b/polly/lib/CodeGen/LoopGenerators.cpp
index b4f8bb8948c28..5f772170d9628 100644
--- a/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/polly/lib/CodeGen/LoopGenerators.cpp
@@ -35,6 +35,11 @@ static cl::opt<int, true>
                      cl::Hidden, cl::location(polly::PollyNumThreads),
                      cl::init(0), cl::cat(PollyCategory));
 
+cl::opt<bool> PollyVectorizeMetadata(
+    "polly-annotate-metadata-vectorize",
+    cl::desc("Append vectorize enable/disable metadata from polly"),
+    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
     "polly-scheduling",
     cl::desc("Scheduling type of parallel OpenMP for loops"),
@@ -159,8 +164,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
 
   // Create the loop latch and annotate it as such.
   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
-  if (Annotator)
-    Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
+
+  // Don't annotate vectorize metadata when both LoopVectDisabled and
+  // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false
+  // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata
+  // to true.
+  if (Annotator) {
+    std::optional<bool> EnableVectorizeMetadata;
+    if (LoopVectDisabled)
+      EnableVectorizeMetadata = false;
+    else if (PollyVectorizeMetadata)
+      EnableVectorizeMetadata = true;
+    Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata);
+  }
 
   IV->addIncoming(IncrementedIV, HeaderBB);
   if (GuardBB)
diff --git a/polly/test/CodeGen/Metadata/basic_vec_annotate.ll b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
new file mode 100644
index 0000000000000..ebe91636ea3cc
--- /dev/null
+++ b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
@@ -0,0 +1,61 @@
+; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s
+
+; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
+; passed.
+
+; void add(int *A, int *B, int *C,int n) {
+;    for(int i=0; i<n; i++)
+;      C[i] += A[i] + B[i];
+; }
+
+; CHECK: for.body:
+; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
+; CHECK: polly.stmt.for.body:
+; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
+; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i32 0}
+; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
+define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry.split
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry.split
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx4, align 4
+  %add5 = add nsw i32 %add, %2
+  store i32 %add5, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.mustprogress"}
diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl
index 717b86d7d6e8a..c5da28845eccf 100644
--- a/utils/bazel/configure.bzl
+++ b/utils/bazel/configure.bzl
@@ -22,6 +22,7 @@ DEFAULT_TARGETS = [
     "PowerPC",
     "RISCV",
     "Sparc",
+    "SPIRV",
     "SystemZ",
     "VE",
     "WebAssembly",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index bfcb53e1f6b09..56dff6b3ad500 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2222,6 +2222,29 @@ llvm_target_lib_list = [lib for lib in [
             ("-gen-exegesis", "lib/Target/PowerPC/PPCGenExegesis.inc"),
         ],
     },
+    {
+        "name": "RISCV",
+        "short_name": "RISCV",
+        "tbl_outs": [
+            ("-gen-asm-matcher", "lib/Target/RISCV/RISCVGenAsmMatcher.inc"),
+            ("-gen-asm-writer", "lib/Target/RISCV/RISCVGenAsmWriter.inc"),
+            ("-gen-compress-inst-emitter", "lib/Target/RISCV/RISCVGenCompressInstEmitter.inc"),
+            ("-gen-dag-isel", "lib/Target/RISCV/RISCVGenDAGISel.inc"),
+            ("-gen-disassembler", "lib/Target/RISCV/RISCVGenDisassemblerTables.inc"),
+            ("-gen-instr-info", "lib/Target/RISCV/RISCVGenInstrInfo.inc"),
+            ("-gen-macro-fusion-pred", "lib/Target/RISCV/RISCVGenMacroFusion.inc"),
+            ("-gen-emitter", "lib/Target/RISCV/RISCVGenMCCodeEmitter.inc"),
+            ("-gen-pseudo-lowering", "lib/Target/RISCV/RISCVGenMCPseudoLowering.inc"),
+            ("-gen-register-bank", "lib/Target/RISCV/RISCVGenRegisterBank.inc"),
+            ("-gen-register-info", "lib/Target/RISCV/RISCVGenRegisterInfo.inc"),
+            ("-gen-subtarget", "lib/Target/RISCV/RISCVGenSubtargetInfo.inc"),
+            ("-gen-searchable-tables", "lib/Target/RISCV/RISCVGenSearchableTables.inc"),
+            ("-gen-exegesis", "lib/Target/RISCV/RISCVGenExegesis.inc"),
+        ],
+        "tbl_deps": [
+            ":riscv_isel_target_gen",
+        ],
+    },
     {
         "name": "Sparc",
         "short_name": "Sparc",
@@ -2238,6 +2261,21 @@ llvm_target_lib_list = [lib for lib in [
             ("-gen-searchable-tables", "lib/Target/Sparc/SparcGenSearchableTables.inc"),
         ],
     },
+    {
+        "name": "SPIRV",
+        "short_name": "SPIRV",
+        "tbl_outs": [
+            ("-gen-asm-writer", "lib/Target/SPIRV/SPIRVGenAsmWriter.inc"),
+            ("-gen-emitter", "lib/Target/SPIRV/SPIRVGenMCCodeEmitter.inc"),
+            ("-gen-global-isel", "lib/Target/SPIRV/SPIRVGenGlobalISel.inc"),
+            ("-gen-global-isel-combiner -combiners=SPIRVPreLegalizerCombiner", "lib/Target/SPIRV/SPIRVGenPreLegalizeGICombiner.inc"),
+            ("-gen-instr-info", "lib/Target/SPIRV/SPIRVGenInstrInfo.inc"),
+            ("-gen-register-bank", "lib/Target/SPIRV/SPIRVGenRegisterBank.inc"),
+            ("-gen-register-info", "lib/Target/SPIRV/SPIRVGenRegisterInfo.inc"),
+            ("-gen-searchable-tables", "lib/Target/SPIRV/SPIRVGenTables.inc"),
+            ("-gen-subtarget", "lib/Target/SPIRV/SPIRVGenSubtargetInfo.inc"),
+        ],
+    },
     {
         "name": "SystemZ",
         "short_name": "SystemZ",
@@ -2254,29 +2292,6 @@ llvm_target_lib_list = [lib for lib in [
             ("-gen-subtarget", "lib/Target/SystemZ/SystemZGenSubtargetInfo.inc"),
         ],
     },
-    {
-        "name": "RISCV",
-        "short_name": "RISCV",
-        "tbl_outs": [
-            ("-gen-asm-matcher", "lib/Target/RISCV/RISCVGenAsmMatcher.inc"),
-            ("-gen-asm-writer", "lib/Target/RISCV/RISCVGenAsmWriter.inc"),
-            ("-gen-compress-inst-emitter", "lib/Target/RISCV/RISCVGenCompressInstEmitter.inc"),
-            ("-gen-dag-isel", "lib/Target/RISCV/RISCVGenDAGISel.inc"),
-            ("-gen-disassembler", "lib/Target/RISCV/RISCVGenDisassemblerTables.inc"),
-            ("-gen-instr-info", "lib/Target/RISCV/RISCVGenInstrInfo.inc"),
-            ("-gen-macro-fusion-pred", "lib/Target/RISCV/RISCVGenMacroFusion.inc"),
-            ("-gen-emitter", "lib/Target/RISCV/RISCVGenMCCodeEmitter.inc"),
-            ("-gen-pseudo-lowering", "lib/Target/RISCV/RISCVGenMCPseudoLowering.inc"),
-            ("-gen-register-bank", "lib/Target/RISCV/RISCVGenRegisterBank.inc"),
-            ("-gen-register-info", "lib/Target/RISCV/RISCVGenRegisterInfo.inc"),
-            ("-gen-subtarget", "lib/Target/RISCV/RISCVGenSubtargetInfo.inc"),
-            ("-gen-searchable-tables", "lib/Target/RISCV/RISCVGenSearchableTables.inc"),
-            ("-gen-exegesis", "lib/Target/RISCV/RISCVGenExegesis.inc"),
-        ],
-        "tbl_deps": [
-            ":riscv_isel_target_gen",
-        ],
-    },
     {
         "name": "VE",
         "short_name": "VE",
@@ -2557,6 +2572,7 @@ gentbl(
             ":TransformUtils",
             ":Vectorize",
             ":config",
+            ":" + target["name"] + "Analysis",
             ":" + target["name"] + "CommonTableGen",
             ":" + target["name"] + "Info",
             ":" + target["name"] + "UtilsAndDesc",
@@ -2661,6 +2677,24 @@ gentbl(
             ":" + target["name"] + "UtilsAndDesc",
         ],
     )],
+    [cc_library(
+        name = target["name"] + "Analysis",
+        srcs = glob(
+            [
+                "lib/Target/" + target["name"] + "/Analysis/*.cpp",
+                "lib/Target/" + target["name"] + "/Analysis/*.h",
+            ],
+            allow_empty = True,
+        ),
+        copts = llvm_copts,
+        features = ["-layering_check"],
+        deps = [
+            ":Analysis",
+            ":Core",
+            ":Support",
+            ":TransformUtils",
+        ],
+    )],
 ] for target in llvm_target_lib_list]
 
 cc_library(
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 1ec1c4bfad562..092c2de414e36 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -4316,7 +4316,6 @@ cc_library(
         ":GPUToNVVMTransforms",
         ":GPUToROCDLTransforms",
         ":GPUToSPIRV",
-        ":GPUToVulkanTransforms",
         ":IndexToLLVM",
         ":IndexToSPIRV",
         ":LinalgToStandard",
@@ -6182,28 +6181,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "GPUToVulkanTransforms",
-    srcs = [
-        "lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp",
-        "lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp",
-    ],
-    hdrs = ["include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"],
-    includes = ["include"],
-    deps = [
-        ":ConversionPassIncGen",
-        ":FuncDialect",
-        ":GPUDialect",
-        ":IR",
-        ":LLVMDialect",
-        ":Pass",
-        ":SPIRVDialect",
-        ":SPIRVSerialization",
-        ":Support",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "GPUToGPURuntimeTransforms",
     srcs = [
@@ -9777,7 +9754,6 @@ cc_library(
         ":GPUToNVVMTransforms",
         ":GPUToROCDLTransforms",
         ":GPUToSPIRV",
-        ":GPUToVulkanTransforms",
         ":GPUTransformOps",
         ":GPUTransforms",
         ":IR",
@@ -10224,8 +10200,8 @@ cc_binary(
 
 cc_library(
     name = "VulkanRuntime",
-    srcs = ["tools/mlir-vulkan-runner/VulkanRuntime.cpp"],
-    hdrs = ["tools/mlir-vulkan-runner/VulkanRuntime.h"],
+    srcs = ["lib/ExecutionEngine/VulkanRuntime.cpp"],
+    hdrs = ["lib/ExecutionEngine/VulkanRuntime.h"],
     tags = [
         "manual",  # External dependency
     ],
@@ -10238,7 +10214,7 @@ cc_library(
 
 cc_binary(
     name = "libvulkan-runtime-wrappers.so",
-    srcs = ["tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp"],
+    srcs = ["lib/ExecutionEngine/VulkanRuntimeWrappers.cpp"],
     linkshared = True,
     linkstatic = False,
     tags = [
@@ -10247,43 +10223,6 @@ cc_binary(
     deps = [":VulkanRuntime"],
 )
 
-cc_binary(
-    name = "mlir-vulkan-runner",
-    srcs = ["tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp"],
-    deps = [
-        ":ArithDialect",
-        ":ArithToLLVM",
-        ":BuiltinToLLVMIRTranslation",
-        ":ControlFlowToLLVM",
-        ":ConvertToSPIRV",
-        ":ExecutionEngineUtils",
-        ":FuncDialect",
-        ":FuncToLLVM",
-        ":FuncToSPIRV",
-        ":GPUDialect",
-        ":GPUToSPIRV",
-        ":GPUToVulkanTransforms",
-        ":GPUTransforms",
-        ":LLVMCommonConversion",
-        ":LLVMDialect",
-        ":LLVMIRTransforms",
-        ":LLVMToLLVMIRTranslation",
-        ":MemRefDialect",
-        ":MemRefToLLVM",
-        ":MemRefTransforms",
-        ":MlirJitRunner",
-        ":Pass",
-        ":ReconcileUnrealizedCasts",
-        ":SCFDialect",
-        ":SPIRVDialect",
-        ":SPIRVTransforms",
-        ":ToLLVMIRTranslation",
-        ":VectorDialect",
-        ":VectorToLLVM",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "TableGen",
     srcs = glob(["lib/TableGen/*.cpp"]),